You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parser.go 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. // Copyright 2015 Unknwon
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License"): you may
  4. // not use this file except in compliance with the License. You may obtain
  5. // a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  11. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  12. // License for the specific language governing permissions and limitations
  13. // under the License.
  14. package ini
  15. import (
  16. "bufio"
  17. "bytes"
  18. "fmt"
  19. "io"
  20. "regexp"
  21. "strconv"
  22. "strings"
  23. "unicode"
  24. )
  25. var pythonMultiline = regexp.MustCompile("^(\\s+)([^\n]+)")
  26. type parserOptions struct {
  27. IgnoreContinuation bool
  28. IgnoreInlineComment bool
  29. AllowPythonMultilineValues bool
  30. SpaceBeforeInlineComment bool
  31. UnescapeValueDoubleQuotes bool
  32. UnescapeValueCommentSymbols bool
  33. PreserveSurroundedQuote bool
  34. }
  35. type parser struct {
  36. buf *bufio.Reader
  37. options parserOptions
  38. isEOF bool
  39. count int
  40. comment *bytes.Buffer
  41. }
  42. func newParser(r io.Reader, opts parserOptions) *parser {
  43. return &parser{
  44. buf: bufio.NewReader(r),
  45. options: opts,
  46. count: 1,
  47. comment: &bytes.Buffer{},
  48. }
  49. }
  50. // BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
  51. // http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
  52. func (p *parser) BOM() error {
  53. mask, err := p.buf.Peek(2)
  54. if err != nil && err != io.EOF {
  55. return err
  56. } else if len(mask) < 2 {
  57. return nil
  58. }
  59. switch {
  60. case mask[0] == 254 && mask[1] == 255:
  61. fallthrough
  62. case mask[0] == 255 && mask[1] == 254:
  63. p.buf.Read(mask)
  64. case mask[0] == 239 && mask[1] == 187:
  65. mask, err := p.buf.Peek(3)
  66. if err != nil && err != io.EOF {
  67. return err
  68. } else if len(mask) < 3 {
  69. return nil
  70. }
  71. if mask[2] == 191 {
  72. p.buf.Read(mask)
  73. }
  74. }
  75. return nil
  76. }
  77. func (p *parser) readUntil(delim byte) ([]byte, error) {
  78. data, err := p.buf.ReadBytes(delim)
  79. if err != nil {
  80. if err == io.EOF {
  81. p.isEOF = true
  82. } else {
  83. return nil, err
  84. }
  85. }
  86. return data, nil
  87. }
  88. func cleanComment(in []byte) ([]byte, bool) {
  89. i := bytes.IndexAny(in, "#;")
  90. if i == -1 {
  91. return nil, false
  92. }
  93. return in[i:], true
  94. }
  95. func readKeyName(delimiters string, in []byte) (string, int, error) {
  96. line := string(in)
  97. // Check if key name surrounded by quotes.
  98. var keyQuote string
  99. if line[0] == '"' {
  100. if len(line) > 6 && string(line[0:3]) == `"""` {
  101. keyQuote = `"""`
  102. } else {
  103. keyQuote = `"`
  104. }
  105. } else if line[0] == '`' {
  106. keyQuote = "`"
  107. }
  108. // Get out key name
  109. endIdx := -1
  110. if len(keyQuote) > 0 {
  111. startIdx := len(keyQuote)
  112. // FIXME: fail case -> """"""name"""=value
  113. pos := strings.Index(line[startIdx:], keyQuote)
  114. if pos == -1 {
  115. return "", -1, fmt.Errorf("missing closing key quote: %s", line)
  116. }
  117. pos += startIdx
  118. // Find key-value delimiter
  119. i := strings.IndexAny(line[pos+startIdx:], delimiters)
  120. if i < 0 {
  121. return "", -1, ErrDelimiterNotFound{line}
  122. }
  123. endIdx = pos + i
  124. return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
  125. }
  126. endIdx = strings.IndexAny(line, delimiters)
  127. if endIdx < 0 {
  128. return "", -1, ErrDelimiterNotFound{line}
  129. }
  130. return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
  131. }
  132. func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
  133. for {
  134. data, err := p.readUntil('\n')
  135. if err != nil {
  136. return "", err
  137. }
  138. next := string(data)
  139. pos := strings.LastIndex(next, valQuote)
  140. if pos > -1 {
  141. val += next[:pos]
  142. comment, has := cleanComment([]byte(next[pos:]))
  143. if has {
  144. p.comment.Write(bytes.TrimSpace(comment))
  145. }
  146. break
  147. }
  148. val += next
  149. if p.isEOF {
  150. return "", fmt.Errorf("missing closing key quote from '%s' to '%s'", line, next)
  151. }
  152. }
  153. return val, nil
  154. }
  155. func (p *parser) readContinuationLines(val string) (string, error) {
  156. for {
  157. data, err := p.readUntil('\n')
  158. if err != nil {
  159. return "", err
  160. }
  161. next := strings.TrimSpace(string(data))
  162. if len(next) == 0 {
  163. break
  164. }
  165. val += next
  166. if val[len(val)-1] != '\\' {
  167. break
  168. }
  169. val = val[:len(val)-1]
  170. }
  171. return val, nil
  172. }
  173. // hasSurroundedQuote check if and only if the first and last characters
  174. // are quotes \" or \'.
  175. // It returns false if any other parts also contain same kind of quotes.
  176. func hasSurroundedQuote(in string, quote byte) bool {
  177. return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
  178. strings.IndexByte(in[1:], quote) == len(in)-2
  179. }
  180. func (p *parser) readValue(in []byte, bufferSize int) (string, error) {
  181. line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
  182. if len(line) == 0 {
  183. if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' {
  184. return p.readPythonMultilines(line, bufferSize)
  185. }
  186. return "", nil
  187. }
  188. var valQuote string
  189. if len(line) > 3 && string(line[0:3]) == `"""` {
  190. valQuote = `"""`
  191. } else if line[0] == '`' {
  192. valQuote = "`"
  193. } else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' {
  194. valQuote = `"`
  195. }
  196. if len(valQuote) > 0 {
  197. startIdx := len(valQuote)
  198. pos := strings.LastIndex(line[startIdx:], valQuote)
  199. // Check for multi-line value
  200. if pos == -1 {
  201. return p.readMultilines(line, line[startIdx:], valQuote)
  202. }
  203. if p.options.UnescapeValueDoubleQuotes && valQuote == `"` {
  204. return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
  205. }
  206. return line[startIdx : pos+startIdx], nil
  207. }
  208. lastChar := line[len(line)-1]
  209. // Won't be able to reach here if value only contains whitespace
  210. line = strings.TrimSpace(line)
  211. trimmedLastChar := line[len(line)-1]
  212. // Check continuation lines when desired
  213. if !p.options.IgnoreContinuation && trimmedLastChar == '\\' {
  214. return p.readContinuationLines(line[:len(line)-1])
  215. }
  216. // Check if ignore inline comment
  217. if !p.options.IgnoreInlineComment {
  218. var i int
  219. if p.options.SpaceBeforeInlineComment {
  220. i = strings.Index(line, " #")
  221. if i == -1 {
  222. i = strings.Index(line, " ;")
  223. }
  224. } else {
  225. i = strings.IndexAny(line, "#;")
  226. }
  227. if i > -1 {
  228. p.comment.WriteString(line[i:])
  229. line = strings.TrimSpace(line[:i])
  230. }
  231. }
  232. // Trim single and double quotes
  233. if (hasSurroundedQuote(line, '\'') ||
  234. hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote {
  235. line = line[1 : len(line)-1]
  236. } else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols {
  237. if strings.Contains(line, `\;`) {
  238. line = strings.Replace(line, `\;`, ";", -1)
  239. }
  240. if strings.Contains(line, `\#`) {
  241. line = strings.Replace(line, `\#`, "#", -1)
  242. }
  243. } else if p.options.AllowPythonMultilineValues && lastChar == '\n' {
  244. return p.readPythonMultilines(line, bufferSize)
  245. }
  246. return line, nil
  247. }
  248. func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) {
  249. parserBufferPeekResult, _ := p.buf.Peek(bufferSize)
  250. peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
  251. for {
  252. peekData, peekErr := peekBuffer.ReadBytes('\n')
  253. if peekErr != nil {
  254. if peekErr == io.EOF {
  255. return line, nil
  256. }
  257. return "", peekErr
  258. }
  259. peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
  260. if len(peekMatches) != 3 {
  261. return line, nil
  262. }
  263. // NOTE: Return if not a python-ini multi-line value.
  264. currentIdentSize := len(peekMatches[1])
  265. if currentIdentSize <= 0 {
  266. return line, nil
  267. }
  268. // NOTE: Just advance the parser reader (buffer) in-sync with the peek buffer.
  269. _, err := p.readUntil('\n')
  270. if err != nil {
  271. return "", err
  272. }
  273. line += fmt.Sprintf("\n%s", peekMatches[2])
  274. }
  275. }
  276. // parse parses data through an io.Reader.
  277. func (f *File) parse(reader io.Reader) (err error) {
  278. p := newParser(reader, parserOptions{
  279. IgnoreContinuation: f.options.IgnoreContinuation,
  280. IgnoreInlineComment: f.options.IgnoreInlineComment,
  281. AllowPythonMultilineValues: f.options.AllowPythonMultilineValues,
  282. SpaceBeforeInlineComment: f.options.SpaceBeforeInlineComment,
  283. UnescapeValueDoubleQuotes: f.options.UnescapeValueDoubleQuotes,
  284. UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols,
  285. PreserveSurroundedQuote: f.options.PreserveSurroundedQuote,
  286. })
  287. if err = p.BOM(); err != nil {
  288. return fmt.Errorf("BOM: %v", err)
  289. }
  290. // Ignore error because default section name is never empty string.
  291. name := DefaultSection
  292. if f.options.Insensitive {
  293. name = strings.ToLower(DefaultSection)
  294. }
  295. section, _ := f.NewSection(name)
  296. // This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
  297. var isLastValueEmpty bool
  298. var lastRegularKey *Key
  299. var line []byte
  300. var inUnparseableSection bool
  301. // NOTE: Iterate and increase `currentPeekSize` until
  302. // the size of the parser buffer is found.
  303. // TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
  304. parserBufferSize := 0
  305. // NOTE: Peek 1kb at a time.
  306. currentPeekSize := 1024
  307. if f.options.AllowPythonMultilineValues {
  308. for {
  309. peekBytes, _ := p.buf.Peek(currentPeekSize)
  310. peekBytesLength := len(peekBytes)
  311. if parserBufferSize >= peekBytesLength {
  312. break
  313. }
  314. currentPeekSize *= 2
  315. parserBufferSize = peekBytesLength
  316. }
  317. }
  318. for !p.isEOF {
  319. line, err = p.readUntil('\n')
  320. if err != nil {
  321. return err
  322. }
  323. if f.options.AllowNestedValues &&
  324. isLastValueEmpty && len(line) > 0 {
  325. if line[0] == ' ' || line[0] == '\t' {
  326. lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
  327. continue
  328. }
  329. }
  330. line = bytes.TrimLeftFunc(line, unicode.IsSpace)
  331. if len(line) == 0 {
  332. continue
  333. }
  334. // Comments
  335. if line[0] == '#' || line[0] == ';' {
  336. // Note: we do not care ending line break,
  337. // it is needed for adding second line,
  338. // so just clean it once at the end when set to value.
  339. p.comment.Write(line)
  340. continue
  341. }
  342. // Section
  343. if line[0] == '[' {
  344. // Read to the next ']' (TODO: support quoted strings)
  345. closeIdx := bytes.LastIndexByte(line, ']')
  346. if closeIdx == -1 {
  347. return fmt.Errorf("unclosed section: %s", line)
  348. }
  349. name := string(line[1:closeIdx])
  350. section, err = f.NewSection(name)
  351. if err != nil {
  352. return err
  353. }
  354. comment, has := cleanComment(line[closeIdx+1:])
  355. if has {
  356. p.comment.Write(comment)
  357. }
  358. section.Comment = strings.TrimSpace(p.comment.String())
  359. // Reset aotu-counter and comments
  360. p.comment.Reset()
  361. p.count = 1
  362. inUnparseableSection = false
  363. for i := range f.options.UnparseableSections {
  364. if f.options.UnparseableSections[i] == name ||
  365. (f.options.Insensitive && strings.ToLower(f.options.UnparseableSections[i]) == strings.ToLower(name)) {
  366. inUnparseableSection = true
  367. continue
  368. }
  369. }
  370. continue
  371. }
  372. if inUnparseableSection {
  373. section.isRawSection = true
  374. section.rawBody += string(line)
  375. continue
  376. }
  377. kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
  378. if err != nil {
  379. // Treat as boolean key when desired, and whole line is key name.
  380. if IsErrDelimiterNotFound(err) {
  381. switch {
  382. case f.options.AllowBooleanKeys:
  383. kname, err := p.readValue(line, parserBufferSize)
  384. if err != nil {
  385. return err
  386. }
  387. key, err := section.NewBooleanKey(kname)
  388. if err != nil {
  389. return err
  390. }
  391. key.Comment = strings.TrimSpace(p.comment.String())
  392. p.comment.Reset()
  393. continue
  394. case f.options.SkipUnrecognizableLines:
  395. continue
  396. }
  397. }
  398. return err
  399. }
  400. // Auto increment.
  401. isAutoIncr := false
  402. if kname == "-" {
  403. isAutoIncr = true
  404. kname = "#" + strconv.Itoa(p.count)
  405. p.count++
  406. }
  407. value, err := p.readValue(line[offset:], parserBufferSize)
  408. if err != nil {
  409. return err
  410. }
  411. isLastValueEmpty = len(value) == 0
  412. key, err := section.NewKey(kname, value)
  413. if err != nil {
  414. return err
  415. }
  416. key.isAutoIncrement = isAutoIncr
  417. key.Comment = strings.TrimSpace(p.comment.String())
  418. p.comment.Reset()
  419. lastRegularKey = key
  420. }
  421. return nil
  422. }