You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parser.go 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. // Copyright 2015 Unknwon
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License"): you may
  4. // not use this file except in compliance with the License. You may obtain
  5. // a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  11. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  12. // License for the specific language governing permissions and limitations
  13. // under the License.
  14. package ini
  15. import (
  16. "bufio"
  17. "bytes"
  18. "fmt"
  19. "io"
  20. "regexp"
  21. "strconv"
  22. "strings"
  23. "unicode"
  24. )
  25. const minReaderBufferSize = 4096
  26. var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`)
  27. type parserOptions struct {
  28. IgnoreContinuation bool
  29. IgnoreInlineComment bool
  30. AllowPythonMultilineValues bool
  31. SpaceBeforeInlineComment bool
  32. UnescapeValueDoubleQuotes bool
  33. UnescapeValueCommentSymbols bool
  34. PreserveSurroundedQuote bool
  35. DebugFunc DebugFunc
  36. ReaderBufferSize int
  37. }
  38. type parser struct {
  39. buf *bufio.Reader
  40. options parserOptions
  41. isEOF bool
  42. count int
  43. comment *bytes.Buffer
  44. }
  45. func (p *parser) debug(format string, args ...interface{}) {
  46. if p.options.DebugFunc != nil {
  47. p.options.DebugFunc(fmt.Sprintf(format, args...))
  48. }
  49. }
  50. func newParser(r io.Reader, opts parserOptions) *parser {
  51. size := opts.ReaderBufferSize
  52. if size < minReaderBufferSize {
  53. size = minReaderBufferSize
  54. }
  55. return &parser{
  56. buf: bufio.NewReaderSize(r, size),
  57. options: opts,
  58. count: 1,
  59. comment: &bytes.Buffer{},
  60. }
  61. }
  62. // BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
  63. // http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
  64. func (p *parser) BOM() error {
  65. mask, err := p.buf.Peek(2)
  66. if err != nil && err != io.EOF {
  67. return err
  68. } else if len(mask) < 2 {
  69. return nil
  70. }
  71. switch {
  72. case mask[0] == 254 && mask[1] == 255:
  73. fallthrough
  74. case mask[0] == 255 && mask[1] == 254:
  75. _, err = p.buf.Read(mask)
  76. if err != nil {
  77. return err
  78. }
  79. case mask[0] == 239 && mask[1] == 187:
  80. mask, err := p.buf.Peek(3)
  81. if err != nil && err != io.EOF {
  82. return err
  83. } else if len(mask) < 3 {
  84. return nil
  85. }
  86. if mask[2] == 191 {
  87. _, err = p.buf.Read(mask)
  88. if err != nil {
  89. return err
  90. }
  91. }
  92. }
  93. return nil
  94. }
  95. func (p *parser) readUntil(delim byte) ([]byte, error) {
  96. data, err := p.buf.ReadBytes(delim)
  97. if err != nil {
  98. if err == io.EOF {
  99. p.isEOF = true
  100. } else {
  101. return nil, err
  102. }
  103. }
  104. return data, nil
  105. }
  106. func cleanComment(in []byte) ([]byte, bool) {
  107. i := bytes.IndexAny(in, "#;")
  108. if i == -1 {
  109. return nil, false
  110. }
  111. return in[i:], true
  112. }
  113. func readKeyName(delimiters string, in []byte) (string, int, error) {
  114. line := string(in)
  115. // Check if key name surrounded by quotes.
  116. var keyQuote string
  117. if line[0] == '"' {
  118. if len(line) > 6 && string(line[0:3]) == `"""` {
  119. keyQuote = `"""`
  120. } else {
  121. keyQuote = `"`
  122. }
  123. } else if line[0] == '`' {
  124. keyQuote = "`"
  125. }
  126. // Get out key name
  127. var endIdx int
  128. if len(keyQuote) > 0 {
  129. startIdx := len(keyQuote)
  130. // FIXME: fail case -> """"""name"""=value
  131. pos := strings.Index(line[startIdx:], keyQuote)
  132. if pos == -1 {
  133. return "", -1, fmt.Errorf("missing closing key quote: %s", line)
  134. }
  135. pos += startIdx
  136. // Find key-value delimiter
  137. i := strings.IndexAny(line[pos+startIdx:], delimiters)
  138. if i < 0 {
  139. return "", -1, ErrDelimiterNotFound{line}
  140. }
  141. endIdx = pos + i
  142. return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
  143. }
  144. endIdx = strings.IndexAny(line, delimiters)
  145. if endIdx < 0 {
  146. return "", -1, ErrDelimiterNotFound{line}
  147. }
  148. return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
  149. }
  150. func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
  151. for {
  152. data, err := p.readUntil('\n')
  153. if err != nil {
  154. return "", err
  155. }
  156. next := string(data)
  157. pos := strings.LastIndex(next, valQuote)
  158. if pos > -1 {
  159. val += next[:pos]
  160. comment, has := cleanComment([]byte(next[pos:]))
  161. if has {
  162. p.comment.Write(bytes.TrimSpace(comment))
  163. }
  164. break
  165. }
  166. val += next
  167. if p.isEOF {
  168. return "", fmt.Errorf("missing closing key quote from %q to %q", line, next)
  169. }
  170. }
  171. return val, nil
  172. }
  173. func (p *parser) readContinuationLines(val string) (string, error) {
  174. for {
  175. data, err := p.readUntil('\n')
  176. if err != nil {
  177. return "", err
  178. }
  179. next := strings.TrimSpace(string(data))
  180. if len(next) == 0 {
  181. break
  182. }
  183. val += next
  184. if val[len(val)-1] != '\\' {
  185. break
  186. }
  187. val = val[:len(val)-1]
  188. }
  189. return val, nil
  190. }
  191. // hasSurroundedQuote check if and only if the first and last characters
  192. // are quotes \" or \'.
  193. // It returns false if any other parts also contain same kind of quotes.
  194. func hasSurroundedQuote(in string, quote byte) bool {
  195. return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
  196. strings.IndexByte(in[1:], quote) == len(in)-2
  197. }
  198. func (p *parser) readValue(in []byte, bufferSize int) (string, error) {
  199. line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
  200. if len(line) == 0 {
  201. if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' {
  202. return p.readPythonMultilines(line, bufferSize)
  203. }
  204. return "", nil
  205. }
  206. var valQuote string
  207. if len(line) > 3 && string(line[0:3]) == `"""` {
  208. valQuote = `"""`
  209. } else if line[0] == '`' {
  210. valQuote = "`"
  211. } else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' {
  212. valQuote = `"`
  213. }
  214. if len(valQuote) > 0 {
  215. startIdx := len(valQuote)
  216. pos := strings.LastIndex(line[startIdx:], valQuote)
  217. // Check for multi-line value
  218. if pos == -1 {
  219. return p.readMultilines(line, line[startIdx:], valQuote)
  220. }
  221. if p.options.UnescapeValueDoubleQuotes && valQuote == `"` {
  222. return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
  223. }
  224. return line[startIdx : pos+startIdx], nil
  225. }
  226. lastChar := line[len(line)-1]
  227. // Won't be able to reach here if value only contains whitespace
  228. line = strings.TrimSpace(line)
  229. trimmedLastChar := line[len(line)-1]
  230. // Check continuation lines when desired
  231. if !p.options.IgnoreContinuation && trimmedLastChar == '\\' {
  232. return p.readContinuationLines(line[:len(line)-1])
  233. }
  234. // Check if ignore inline comment
  235. if !p.options.IgnoreInlineComment {
  236. var i int
  237. if p.options.SpaceBeforeInlineComment {
  238. i = strings.Index(line, " #")
  239. if i == -1 {
  240. i = strings.Index(line, " ;")
  241. }
  242. } else {
  243. i = strings.IndexAny(line, "#;")
  244. }
  245. if i > -1 {
  246. p.comment.WriteString(line[i:])
  247. line = strings.TrimSpace(line[:i])
  248. }
  249. }
  250. // Trim single and double quotes
  251. if (hasSurroundedQuote(line, '\'') ||
  252. hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote {
  253. line = line[1 : len(line)-1]
  254. } else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols {
  255. if strings.Contains(line, `\;`) {
  256. line = strings.Replace(line, `\;`, ";", -1)
  257. }
  258. if strings.Contains(line, `\#`) {
  259. line = strings.Replace(line, `\#`, "#", -1)
  260. }
  261. } else if p.options.AllowPythonMultilineValues && lastChar == '\n' {
  262. return p.readPythonMultilines(line, bufferSize)
  263. }
  264. return line, nil
  265. }
  266. func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) {
  267. parserBufferPeekResult, _ := p.buf.Peek(bufferSize)
  268. peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
  269. indentSize := 0
  270. for {
  271. peekData, peekErr := peekBuffer.ReadBytes('\n')
  272. if peekErr != nil {
  273. if peekErr == io.EOF {
  274. p.debug("readPythonMultilines: io.EOF, peekData: %q, line: %q", string(peekData), line)
  275. return line, nil
  276. }
  277. p.debug("readPythonMultilines: failed to peek with error: %v", peekErr)
  278. return "", peekErr
  279. }
  280. p.debug("readPythonMultilines: parsing %q", string(peekData))
  281. peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
  282. p.debug("readPythonMultilines: matched %d parts", len(peekMatches))
  283. for n, v := range peekMatches {
  284. p.debug(" %d: %q", n, v)
  285. }
  286. // Return if not a Python multiline value.
  287. if len(peekMatches) != 3 {
  288. p.debug("readPythonMultilines: end of value, got: %q", line)
  289. return line, nil
  290. }
  291. // Determine indent size and line prefix.
  292. currentIndentSize := len(peekMatches[1])
  293. if indentSize < 1 {
  294. indentSize = currentIndentSize
  295. p.debug("readPythonMultilines: indent size is %d", indentSize)
  296. }
  297. // Make sure each line is indented at least as far as first line.
  298. if currentIndentSize < indentSize {
  299. p.debug("readPythonMultilines: end of value, current indent: %d, expected indent: %d, line: %q", currentIndentSize, indentSize, line)
  300. return line, nil
  301. }
  302. // Advance the parser reader (buffer) in-sync with the peek buffer.
  303. _, err := p.buf.Discard(len(peekData))
  304. if err != nil {
  305. p.debug("readPythonMultilines: failed to skip to the end, returning error")
  306. return "", err
  307. }
  308. // Handle indented empty line.
  309. line += "\n" + peekMatches[1][indentSize:] + peekMatches[2]
  310. }
  311. }
  312. // parse parses data through an io.Reader.
  313. func (f *File) parse(reader io.Reader) (err error) {
  314. p := newParser(reader, parserOptions{
  315. IgnoreContinuation: f.options.IgnoreContinuation,
  316. IgnoreInlineComment: f.options.IgnoreInlineComment,
  317. AllowPythonMultilineValues: f.options.AllowPythonMultilineValues,
  318. SpaceBeforeInlineComment: f.options.SpaceBeforeInlineComment,
  319. UnescapeValueDoubleQuotes: f.options.UnescapeValueDoubleQuotes,
  320. UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols,
  321. PreserveSurroundedQuote: f.options.PreserveSurroundedQuote,
  322. DebugFunc: f.options.DebugFunc,
  323. ReaderBufferSize: f.options.ReaderBufferSize,
  324. })
  325. if err = p.BOM(); err != nil {
  326. return fmt.Errorf("BOM: %v", err)
  327. }
  328. // Ignore error because default section name is never empty string.
  329. name := DefaultSection
  330. if f.options.Insensitive || f.options.InsensitiveSections {
  331. name = strings.ToLower(DefaultSection)
  332. }
  333. section, _ := f.NewSection(name)
  334. // This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
  335. var isLastValueEmpty bool
  336. var lastRegularKey *Key
  337. var line []byte
  338. var inUnparseableSection bool
  339. // NOTE: Iterate and increase `currentPeekSize` until
  340. // the size of the parser buffer is found.
  341. // TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
  342. parserBufferSize := 0
  343. // NOTE: Peek 4kb at a time.
  344. currentPeekSize := minReaderBufferSize
  345. if f.options.AllowPythonMultilineValues {
  346. for {
  347. peekBytes, _ := p.buf.Peek(currentPeekSize)
  348. peekBytesLength := len(peekBytes)
  349. if parserBufferSize >= peekBytesLength {
  350. break
  351. }
  352. currentPeekSize *= 2
  353. parserBufferSize = peekBytesLength
  354. }
  355. }
  356. for !p.isEOF {
  357. line, err = p.readUntil('\n')
  358. if err != nil {
  359. return err
  360. }
  361. if f.options.AllowNestedValues &&
  362. isLastValueEmpty && len(line) > 0 {
  363. if line[0] == ' ' || line[0] == '\t' {
  364. err = lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
  365. if err != nil {
  366. return err
  367. }
  368. continue
  369. }
  370. }
  371. line = bytes.TrimLeftFunc(line, unicode.IsSpace)
  372. if len(line) == 0 {
  373. continue
  374. }
  375. // Comments
  376. if line[0] == '#' || line[0] == ';' {
  377. // Note: we do not care ending line break,
  378. // it is needed for adding second line,
  379. // so just clean it once at the end when set to value.
  380. p.comment.Write(line)
  381. continue
  382. }
  383. // Section
  384. if line[0] == '[' {
  385. // Read to the next ']' (TODO: support quoted strings)
  386. closeIdx := bytes.LastIndexByte(line, ']')
  387. if closeIdx == -1 {
  388. return fmt.Errorf("unclosed section: %s", line)
  389. }
  390. name := string(line[1:closeIdx])
  391. section, err = f.NewSection(name)
  392. if err != nil {
  393. return err
  394. }
  395. comment, has := cleanComment(line[closeIdx+1:])
  396. if has {
  397. p.comment.Write(comment)
  398. }
  399. section.Comment = strings.TrimSpace(p.comment.String())
  400. // Reset auto-counter and comments
  401. p.comment.Reset()
  402. p.count = 1
  403. inUnparseableSection = false
  404. for i := range f.options.UnparseableSections {
  405. if f.options.UnparseableSections[i] == name ||
  406. ((f.options.Insensitive || f.options.InsensitiveSections) && strings.EqualFold(f.options.UnparseableSections[i], name)) {
  407. inUnparseableSection = true
  408. continue
  409. }
  410. }
  411. continue
  412. }
  413. if inUnparseableSection {
  414. section.isRawSection = true
  415. section.rawBody += string(line)
  416. continue
  417. }
  418. kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
  419. if err != nil {
  420. // Treat as boolean key when desired, and whole line is key name.
  421. if IsErrDelimiterNotFound(err) {
  422. switch {
  423. case f.options.AllowBooleanKeys:
  424. kname, err := p.readValue(line, parserBufferSize)
  425. if err != nil {
  426. return err
  427. }
  428. key, err := section.NewBooleanKey(kname)
  429. if err != nil {
  430. return err
  431. }
  432. key.Comment = strings.TrimSpace(p.comment.String())
  433. p.comment.Reset()
  434. continue
  435. case f.options.SkipUnrecognizableLines:
  436. continue
  437. }
  438. }
  439. return err
  440. }
  441. // Auto increment.
  442. isAutoIncr := false
  443. if kname == "-" {
  444. isAutoIncr = true
  445. kname = "#" + strconv.Itoa(p.count)
  446. p.count++
  447. }
  448. value, err := p.readValue(line[offset:], parserBufferSize)
  449. if err != nil {
  450. return err
  451. }
  452. isLastValueEmpty = len(value) == 0
  453. key, err := section.NewKey(kname, value)
  454. if err != nil {
  455. return err
  456. }
  457. key.isAutoIncrement = isAutoIncr
  458. key.Comment = strings.TrimSpace(p.comment.String())
  459. p.comment.Reset()
  460. lastRegularKey = key
  461. }
  462. return nil
  463. }