You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

RawParseUtils.java 39KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338
  1. /*
  2. * Copyright (C) 2008-2009, Google Inc.
  3. * Copyright (C) 2006-2008, Shawn O. Pearce <spearce@spearce.org>
  4. * and other copyright owners as documented in the project's IP log.
  5. *
  6. * This program and the accompanying materials are made available
  7. * under the terms of the Eclipse Distribution License v1.0 which
  8. * accompanies this distribution, is reproduced below, and is
  9. * available at http://www.eclipse.org/org/documents/edl-v10.php
  10. *
  11. * All rights reserved.
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. *
  20. * - Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials provided
  23. * with the distribution.
  24. *
  25. * - Neither the name of the Eclipse Foundation, Inc. nor the
  26. * names of its contributors may be used to endorse or promote
  27. * products derived from this software without specific prior
  28. * written permission.
  29. *
  30. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  31. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  32. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  36. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  37. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  38. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  39. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  40. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  42. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. */
  44. package org.eclipse.jgit.util;
  45. import static java.nio.charset.StandardCharsets.ISO_8859_1;
  46. import static java.nio.charset.StandardCharsets.UTF_8;
  47. import static org.eclipse.jgit.lib.ObjectChecker.author;
  48. import static org.eclipse.jgit.lib.ObjectChecker.committer;
  49. import static org.eclipse.jgit.lib.ObjectChecker.encoding;
  50. import static org.eclipse.jgit.lib.ObjectChecker.tagger;
  51. import java.nio.ByteBuffer;
  52. import java.nio.charset.CharacterCodingException;
  53. import java.nio.charset.Charset;
  54. import java.nio.charset.CharsetDecoder;
  55. import java.nio.charset.CodingErrorAction;
  56. import java.nio.charset.IllegalCharsetNameException;
  57. import java.nio.charset.UnsupportedCharsetException;
  58. import java.util.Arrays;
  59. import java.util.HashMap;
  60. import java.util.Map;
  61. import org.eclipse.jgit.annotations.Nullable;
  62. import org.eclipse.jgit.errors.BinaryBlobException;
  63. import org.eclipse.jgit.lib.Constants;
  64. import org.eclipse.jgit.lib.PersonIdent;
  65. /**
  66. * Handy utility functions to parse raw object contents.
  67. */
  68. public final class RawParseUtils {
  69. /**
  70. * UTF-8 charset constant.
  71. *
  72. * @since 2.2
  73. */
  74. public static final Charset UTF8_CHARSET = UTF_8;
  75. private static final byte[] digits10;
  76. private static final byte[] digits16;
  77. private static final byte[] footerLineKeyChars;
  78. private static final Map<String, Charset> encodingAliases;
  79. static {
  80. encodingAliases = new HashMap<>();
  81. encodingAliases.put("latin-1", ISO_8859_1); //$NON-NLS-1$
  82. encodingAliases.put("iso-latin-1", ISO_8859_1); //$NON-NLS-1$
  83. digits10 = new byte['9' + 1];
  84. Arrays.fill(digits10, (byte) -1);
  85. for (char i = '0'; i <= '9'; i++)
  86. digits10[i] = (byte) (i - '0');
  87. digits16 = new byte['f' + 1];
  88. Arrays.fill(digits16, (byte) -1);
  89. for (char i = '0'; i <= '9'; i++)
  90. digits16[i] = (byte) (i - '0');
  91. for (char i = 'a'; i <= 'f'; i++)
  92. digits16[i] = (byte) ((i - 'a') + 10);
  93. for (char i = 'A'; i <= 'F'; i++)
  94. digits16[i] = (byte) ((i - 'A') + 10);
  95. footerLineKeyChars = new byte['z' + 1];
  96. footerLineKeyChars['-'] = 1;
  97. for (char i = '0'; i <= '9'; i++)
  98. footerLineKeyChars[i] = 1;
  99. for (char i = 'A'; i <= 'Z'; i++)
  100. footerLineKeyChars[i] = 1;
  101. for (char i = 'a'; i <= 'z'; i++)
  102. footerLineKeyChars[i] = 1;
  103. }
  104. /**
  105. * Determine if b[ptr] matches src.
  106. *
  107. * @param b
  108. * the buffer to scan.
  109. * @param ptr
  110. * first position within b, this should match src[0].
  111. * @param src
  112. * the buffer to test for equality with b.
  113. * @return ptr + src.length if b[ptr..src.length] == src; else -1.
  114. */
  115. public static final int match(byte[] b, int ptr, byte[] src) {
  116. if (ptr + src.length > b.length)
  117. return -1;
  118. for (int i = 0; i < src.length; i++, ptr++)
  119. if (b[ptr] != src[i])
  120. return -1;
  121. return ptr;
  122. }
  123. private static final byte[] base10byte = { '0', '1', '2', '3', '4', '5',
  124. '6', '7', '8', '9' };
  125. /**
  126. * Format a base 10 numeric into a temporary buffer.
  127. * <p>
  128. * Formatting is performed backwards. The method starts at offset
  129. * <code>o-1</code> and ends at <code>o-1-digits</code>, where
  130. * <code>digits</code> is the number of positions necessary to store the
  131. * base 10 value.
  132. * <p>
  133. * The argument and return values from this method make it easy to chain
  134. * writing, for example:
  135. * </p>
  136. *
  137. * <pre>
  138. * final byte[] tmp = new byte[64];
  139. * int ptr = tmp.length;
  140. * tmp[--ptr] = '\n';
  141. * ptr = RawParseUtils.formatBase10(tmp, ptr, 32);
  142. * tmp[--ptr] = ' ';
  143. * ptr = RawParseUtils.formatBase10(tmp, ptr, 18);
  144. * tmp[--ptr] = 0;
  145. * final String str = new String(tmp, ptr, tmp.length - ptr);
  146. * </pre>
  147. *
  148. * @param b
  149. * buffer to write into.
  150. * @param o
  151. * one offset past the location where writing will begin; writing
  152. * proceeds towards lower index values.
  153. * @param value
  154. * the value to store.
  155. * @return the new offset value <code>o</code>. This is the position of
  156. * the last byte written. Additional writing should start at one
  157. * position earlier.
  158. */
  159. public static int formatBase10(final byte[] b, int o, int value) {
  160. if (value == 0) {
  161. b[--o] = '0';
  162. return o;
  163. }
  164. final boolean isneg = value < 0;
  165. if (isneg)
  166. value = -value;
  167. while (value != 0) {
  168. b[--o] = base10byte[value % 10];
  169. value /= 10;
  170. }
  171. if (isneg)
  172. b[--o] = '-';
  173. return o;
  174. }
  175. /**
  176. * Parse a base 10 numeric from a sequence of ASCII digits into an int.
  177. * <p>
  178. * Digit sequences can begin with an optional run of spaces before the
  179. * sequence, and may start with a '+' or a '-' to indicate sign position.
  180. * Any other characters will cause the method to stop and return the current
  181. * result to the caller.
  182. *
  183. * @param b
  184. * buffer to scan.
  185. * @param ptr
  186. * position within buffer to start parsing digits at.
  187. * @param ptrResult
  188. * optional location to return the new ptr value through. If null
  189. * the ptr value will be discarded.
  190. * @return the value at this location; 0 if the location is not a valid
  191. * numeric.
  192. */
  193. public static final int parseBase10(final byte[] b, int ptr,
  194. final MutableInteger ptrResult) {
  195. int r = 0;
  196. int sign = 0;
  197. try {
  198. final int sz = b.length;
  199. while (ptr < sz && b[ptr] == ' ')
  200. ptr++;
  201. if (ptr >= sz)
  202. return 0;
  203. switch (b[ptr]) {
  204. case '-':
  205. sign = -1;
  206. ptr++;
  207. break;
  208. case '+':
  209. ptr++;
  210. break;
  211. }
  212. while (ptr < sz) {
  213. final byte v = digits10[b[ptr]];
  214. if (v < 0)
  215. break;
  216. r = (r * 10) + v;
  217. ptr++;
  218. }
  219. } catch (ArrayIndexOutOfBoundsException e) {
  220. // Not a valid digit.
  221. }
  222. if (ptrResult != null)
  223. ptrResult.value = ptr;
  224. return sign < 0 ? -r : r;
  225. }
  226. /**
  227. * Parse a base 10 numeric from a sequence of ASCII digits into a long.
  228. * <p>
  229. * Digit sequences can begin with an optional run of spaces before the
  230. * sequence, and may start with a '+' or a '-' to indicate sign position.
  231. * Any other characters will cause the method to stop and return the current
  232. * result to the caller.
  233. *
  234. * @param b
  235. * buffer to scan.
  236. * @param ptr
  237. * position within buffer to start parsing digits at.
  238. * @param ptrResult
  239. * optional location to return the new ptr value through. If null
  240. * the ptr value will be discarded.
  241. * @return the value at this location; 0 if the location is not a valid
  242. * numeric.
  243. */
  244. public static final long parseLongBase10(final byte[] b, int ptr,
  245. final MutableInteger ptrResult) {
  246. long r = 0;
  247. int sign = 0;
  248. try {
  249. final int sz = b.length;
  250. while (ptr < sz && b[ptr] == ' ')
  251. ptr++;
  252. if (ptr >= sz)
  253. return 0;
  254. switch (b[ptr]) {
  255. case '-':
  256. sign = -1;
  257. ptr++;
  258. break;
  259. case '+':
  260. ptr++;
  261. break;
  262. }
  263. while (ptr < sz) {
  264. final byte v = digits10[b[ptr]];
  265. if (v < 0)
  266. break;
  267. r = (r * 10) + v;
  268. ptr++;
  269. }
  270. } catch (ArrayIndexOutOfBoundsException e) {
  271. // Not a valid digit.
  272. }
  273. if (ptrResult != null)
  274. ptrResult.value = ptr;
  275. return sign < 0 ? -r : r;
  276. }
  277. /**
  278. * Parse 4 character base 16 (hex) formatted string to unsigned integer.
  279. * <p>
  280. * The number is read in network byte order, that is, most significant
  281. * nybble first.
  282. *
  283. * @param bs
  284. * buffer to parse digits from; positions {@code [p, p+4)} will
  285. * be parsed.
  286. * @param p
  287. * first position within the buffer to parse.
  288. * @return the integer value.
  289. * @throws java.lang.ArrayIndexOutOfBoundsException
  290. * if the string is not hex formatted.
  291. */
  292. public static final int parseHexInt16(final byte[] bs, final int p) {
  293. int r = digits16[bs[p]] << 4;
  294. r |= digits16[bs[p + 1]];
  295. r <<= 4;
  296. r |= digits16[bs[p + 2]];
  297. r <<= 4;
  298. r |= digits16[bs[p + 3]];
  299. if (r < 0)
  300. throw new ArrayIndexOutOfBoundsException();
  301. return r;
  302. }
  303. /**
  304. * Parse 8 character base 16 (hex) formatted string to unsigned integer.
  305. * <p>
  306. * The number is read in network byte order, that is, most significant
  307. * nybble first.
  308. *
  309. * @param bs
  310. * buffer to parse digits from; positions {@code [p, p+8)} will
  311. * be parsed.
  312. * @param p
  313. * first position within the buffer to parse.
  314. * @return the integer value.
  315. * @throws java.lang.ArrayIndexOutOfBoundsException
  316. * if the string is not hex formatted.
  317. */
  318. public static final int parseHexInt32(final byte[] bs, final int p) {
  319. int r = digits16[bs[p]] << 4;
  320. r |= digits16[bs[p + 1]];
  321. r <<= 4;
  322. r |= digits16[bs[p + 2]];
  323. r <<= 4;
  324. r |= digits16[bs[p + 3]];
  325. r <<= 4;
  326. r |= digits16[bs[p + 4]];
  327. r <<= 4;
  328. r |= digits16[bs[p + 5]];
  329. r <<= 4;
  330. r |= digits16[bs[p + 6]];
  331. final int last = digits16[bs[p + 7]];
  332. if (r < 0 || last < 0)
  333. throw new ArrayIndexOutOfBoundsException();
  334. return (r << 4) | last;
  335. }
  336. /**
  337. * Parse 16 character base 16 (hex) formatted string to unsigned long.
  338. * <p>
  339. * The number is read in network byte order, that is, most significant
  340. * nibble first.
  341. *
  342. * @param bs
  343. * buffer to parse digits from; positions {@code [p, p+16)} will
  344. * be parsed.
  345. * @param p
  346. * first position within the buffer to parse.
  347. * @return the integer value.
  348. * @throws java.lang.ArrayIndexOutOfBoundsException
  349. * if the string is not hex formatted.
  350. * @since 4.3
  351. */
  352. public static final long parseHexInt64(final byte[] bs, final int p) {
  353. long r = digits16[bs[p]] << 4;
  354. r |= digits16[bs[p + 1]];
  355. r <<= 4;
  356. r |= digits16[bs[p + 2]];
  357. r <<= 4;
  358. r |= digits16[bs[p + 3]];
  359. r <<= 4;
  360. r |= digits16[bs[p + 4]];
  361. r <<= 4;
  362. r |= digits16[bs[p + 5]];
  363. r <<= 4;
  364. r |= digits16[bs[p + 6]];
  365. r <<= 4;
  366. r |= digits16[bs[p + 7]];
  367. r <<= 4;
  368. r |= digits16[bs[p + 8]];
  369. r <<= 4;
  370. r |= digits16[bs[p + 9]];
  371. r <<= 4;
  372. r |= digits16[bs[p + 10]];
  373. r <<= 4;
  374. r |= digits16[bs[p + 11]];
  375. r <<= 4;
  376. r |= digits16[bs[p + 12]];
  377. r <<= 4;
  378. r |= digits16[bs[p + 13]];
  379. r <<= 4;
  380. r |= digits16[bs[p + 14]];
  381. final int last = digits16[bs[p + 15]];
  382. if (r < 0 || last < 0)
  383. throw new ArrayIndexOutOfBoundsException();
  384. return (r << 4) | last;
  385. }
  386. /**
  387. * Parse a single hex digit to its numeric value (0-15).
  388. *
  389. * @param digit
  390. * hex character to parse.
  391. * @return numeric value, in the range 0-15.
  392. * @throws java.lang.ArrayIndexOutOfBoundsException
  393. * if the input digit is not a valid hex digit.
  394. */
  395. public static final int parseHexInt4(final byte digit) {
  396. final byte r = digits16[digit];
  397. if (r < 0)
  398. throw new ArrayIndexOutOfBoundsException();
  399. return r;
  400. }
  401. /**
  402. * Parse a Git style timezone string.
  403. * <p>
  404. * The sequence "-0315" will be parsed as the numeric value -195, as the
  405. * lower two positions count minutes, not 100ths of an hour.
  406. *
  407. * @param b
  408. * buffer to scan.
  409. * @param ptr
  410. * position within buffer to start parsing digits at.
  411. * @return the timezone at this location, expressed in minutes.
  412. */
  413. public static final int parseTimeZoneOffset(byte[] b, int ptr) {
  414. return parseTimeZoneOffset(b, ptr, null);
  415. }
  416. /**
  417. * Parse a Git style timezone string.
  418. * <p>
  419. * The sequence "-0315" will be parsed as the numeric value -195, as the
  420. * lower two positions count minutes, not 100ths of an hour.
  421. *
  422. * @param b
  423. * buffer to scan.
  424. * @param ptr
  425. * position within buffer to start parsing digits at.
  426. * @param ptrResult
  427. * optional location to return the new ptr value through. If null
  428. * the ptr value will be discarded.
  429. * @return the timezone at this location, expressed in minutes.
  430. * @since 4.1
  431. */
  432. public static final int parseTimeZoneOffset(final byte[] b, int ptr,
  433. MutableInteger ptrResult) {
  434. final int v = parseBase10(b, ptr, ptrResult);
  435. final int tzMins = v % 100;
  436. final int tzHours = v / 100;
  437. return tzHours * 60 + tzMins;
  438. }
  439. /**
  440. * Locate the first position after a given character.
  441. *
  442. * @param b
  443. * buffer to scan.
  444. * @param ptr
  445. * position within buffer to start looking for chrA at.
  446. * @param chrA
  447. * character to find.
  448. * @return new position just after chrA.
  449. */
  450. public static final int next(byte[] b, int ptr, char chrA) {
  451. final int sz = b.length;
  452. while (ptr < sz) {
  453. if (b[ptr++] == chrA)
  454. return ptr;
  455. }
  456. return ptr;
  457. }
  458. /**
  459. * Locate the first position after the next LF.
  460. * <p>
  461. * This method stops on the first '\n' it finds.
  462. *
  463. * @param b
  464. * buffer to scan.
  465. * @param ptr
  466. * position within buffer to start looking for LF at.
  467. * @return new position just after the first LF found.
  468. */
  469. public static final int nextLF(byte[] b, int ptr) {
  470. return next(b, ptr, '\n');
  471. }
  472. /**
  473. * Locate the first position after either the given character or LF.
  474. * <p>
  475. * This method stops on the first match it finds from either chrA or '\n'.
  476. *
  477. * @param b
  478. * buffer to scan.
  479. * @param ptr
  480. * position within buffer to start looking for chrA or LF at.
  481. * @param chrA
  482. * character to find.
  483. * @return new position just after the first chrA or LF to be found.
  484. */
  485. public static final int nextLF(byte[] b, int ptr, char chrA) {
  486. final int sz = b.length;
  487. while (ptr < sz) {
  488. final byte c = b[ptr++];
  489. if (c == chrA || c == '\n')
  490. return ptr;
  491. }
  492. return ptr;
  493. }
  494. /**
  495. * Locate the end of the header. Note that headers may be
  496. * more than one line long.
  497. * @param b
  498. * buffer to scan.
  499. * @param ptr
  500. * position within buffer to start looking for the end-of-header.
  501. * @return new position just after the header. This is either
  502. * b.length, or the index of the header's terminating newline.
  503. * @since 5.1
  504. */
  505. public static final int headerEnd(final byte[] b, int ptr) {
  506. final int sz = b.length;
  507. while (ptr < sz) {
  508. final byte c = b[ptr++];
  509. if (c == '\n' && (ptr == sz || b[ptr] != ' ')) {
  510. return ptr - 1;
  511. }
  512. }
  513. return ptr - 1;
  514. }
  515. /**
  516. * Find the start of the contents of a given header.
  517. *
  518. * @param b
  519. * buffer to scan.
  520. * @param headerName
  521. * header to search for
  522. * @param ptr
  523. * position within buffer to start looking for header at.
  524. * @return new position at the start of the header's contents, -1 for
  525. * not found
  526. * @since 5.1
  527. */
  528. public static final int headerStart(byte[] headerName, byte[] b, int ptr) {
  529. // Start by advancing to just past a LF or buffer start
  530. if (ptr != 0) {
  531. ptr = nextLF(b, ptr - 1);
  532. }
  533. while (ptr < b.length - (headerName.length + 1)) {
  534. boolean found = true;
  535. for (int i = 0; i < headerName.length; i++) {
  536. if (headerName[i] != b[ptr++]) {
  537. found = false;
  538. break;
  539. }
  540. }
  541. if (found && b[ptr++] == ' ') {
  542. return ptr;
  543. }
  544. ptr = nextLF(b, ptr);
  545. }
  546. return -1;
  547. }
  548. /**
  549. * Locate the first position before a given character.
  550. *
  551. * @param b
  552. * buffer to scan.
  553. * @param ptr
  554. * position within buffer to start looking for chrA at.
  555. * @param chrA
  556. * character to find.
  557. * @return new position just before chrA, -1 for not found
  558. */
  559. public static final int prev(byte[] b, int ptr, char chrA) {
  560. if (ptr == b.length)
  561. --ptr;
  562. while (ptr >= 0) {
  563. if (b[ptr--] == chrA)
  564. return ptr;
  565. }
  566. return ptr;
  567. }
  568. /**
  569. * Locate the first position before the previous LF.
  570. * <p>
  571. * This method stops on the first '\n' it finds.
  572. *
  573. * @param b
  574. * buffer to scan.
  575. * @param ptr
  576. * position within buffer to start looking for LF at.
  577. * @return new position just before the first LF found, -1 for not found
  578. */
  579. public static final int prevLF(byte[] b, int ptr) {
  580. return prev(b, ptr, '\n');
  581. }
  582. /**
  583. * Locate the previous position before either the given character or LF.
  584. * <p>
  585. * This method stops on the first match it finds from either chrA or '\n'.
  586. *
  587. * @param b
  588. * buffer to scan.
  589. * @param ptr
  590. * position within buffer to start looking for chrA or LF at.
  591. * @param chrA
  592. * character to find.
  593. * @return new position just before the first chrA or LF to be found, -1 for
  594. * not found
  595. */
  596. public static final int prevLF(byte[] b, int ptr, char chrA) {
  597. if (ptr == b.length)
  598. --ptr;
  599. while (ptr >= 0) {
  600. final byte c = b[ptr--];
  601. if (c == chrA || c == '\n')
  602. return ptr;
  603. }
  604. return ptr;
  605. }
  606. /**
  607. * Index the region between <code>[ptr, end)</code> to find line starts.
  608. * <p>
  609. * The returned list is 1 indexed. Index 0 contains
  610. * {@link java.lang.Integer#MIN_VALUE} to pad the list out.
  611. * <p>
  612. * Using a 1 indexed list means that line numbers can be directly accessed
  613. * from the list, so <code>list.get(1)</code> (aka get line 1) returns
  614. * <code>ptr</code>.
  615. * <p>
  616. * The last element (index <code>map.size()-1</code>) always contains
  617. * <code>end</code>.
  618. *
  619. * @param buf
  620. * buffer to scan.
  621. * @param ptr
  622. * position within the buffer corresponding to the first byte of
  623. * line 1.
  624. * @param end
  625. * 1 past the end of the content within <code>buf</code>.
  626. * @return a line map indicating the starting position of each line.
  627. */
  628. public static final IntList lineMap(byte[] buf, int ptr, int end) {
  629. IntList map = new IntList((end - ptr) / 36);
  630. map.fillTo(1, Integer.MIN_VALUE);
  631. for (; ptr < end; ptr = nextLF(buf, ptr)) {
  632. map.add(ptr);
  633. }
  634. map.add(end);
  635. return map;
  636. }
  637. /**
  638. * Like {@link #lineMap(byte[], int, int)} but throw
  639. * {@link BinaryBlobException} if a NUL byte is encountered.
  640. *
  641. * @param buf
  642. * buffer to scan.
  643. * @param ptr
  644. * position within the buffer corresponding to the first byte of
  645. * line 1.
  646. * @param end
  647. * 1 past the end of the content within <code>buf</code>.
  648. * @return a line map indicating the starting position of each line.
  649. * @throws BinaryBlobException
  650. * if a NUL byte is found.
  651. * @since 5.0
  652. */
  653. public static final IntList lineMapOrBinary(byte[] buf, int ptr, int end)
  654. throws BinaryBlobException {
  655. IntList map = lineMapOrNull(buf, ptr, end);
  656. if (map == null) {
  657. throw new BinaryBlobException();
  658. }
  659. return map;
  660. }
  661. @Nullable
  662. private static IntList lineMapOrNull(byte[] buf, int ptr, int end) {
  663. // Experimentally derived from multiple source repositories
  664. // the average number of bytes/line is 36. Its a rough guess
  665. // to initially size our map close to the target.
  666. IntList map = new IntList((end - ptr) / 36);
  667. map.add(Integer.MIN_VALUE);
  668. boolean foundLF = true;
  669. for (; ptr < end; ptr++) {
  670. if (foundLF) {
  671. map.add(ptr);
  672. }
  673. if (buf[ptr] == '\0') {
  674. return null;
  675. }
  676. foundLF = (buf[ptr] == '\n');
  677. }
  678. map.add(end);
  679. return map;
  680. }
  681. /**
  682. * Locate the "author " header line data.
  683. *
  684. * @param b
  685. * buffer to scan.
  686. * @param ptr
  687. * position in buffer to start the scan at. Most callers should
  688. * pass 0 to ensure the scan starts from the beginning of the
  689. * commit buffer and does not accidentally look at message body.
  690. * @return position just after the space in "author ", so the first
  691. * character of the author's name. If no author header can be
  692. * located -1 is returned.
  693. */
  694. public static final int author(byte[] b, int ptr) {
  695. final int sz = b.length;
  696. if (ptr == 0)
  697. ptr += 46; // skip the "tree ..." line.
  698. while (ptr < sz && b[ptr] == 'p')
  699. ptr += 48; // skip this parent.
  700. return match(b, ptr, author);
  701. }
  702. /**
  703. * Locate the "committer " header line data.
  704. *
  705. * @param b
  706. * buffer to scan.
  707. * @param ptr
  708. * position in buffer to start the scan at. Most callers should
  709. * pass 0 to ensure the scan starts from the beginning of the
  710. * commit buffer and does not accidentally look at message body.
  711. * @return position just after the space in "committer ", so the first
  712. * character of the committer's name. If no committer header can be
  713. * located -1 is returned.
  714. */
  715. public static final int committer(byte[] b, int ptr) {
  716. final int sz = b.length;
  717. if (ptr == 0)
  718. ptr += 46; // skip the "tree ..." line.
  719. while (ptr < sz && b[ptr] == 'p')
  720. ptr += 48; // skip this parent.
  721. if (ptr < sz && b[ptr] == 'a')
  722. ptr = nextLF(b, ptr);
  723. return match(b, ptr, committer);
  724. }
  725. /**
  726. * Locate the "tagger " header line data.
  727. *
  728. * @param b
  729. * buffer to scan.
  730. * @param ptr
  731. * position in buffer to start the scan at. Most callers should
  732. * pass 0 to ensure the scan starts from the beginning of the tag
  733. * buffer and does not accidentally look at message body.
  734. * @return position just after the space in "tagger ", so the first
  735. * character of the tagger's name. If no tagger header can be
  736. * located -1 is returned.
  737. */
  738. public static final int tagger(byte[] b, int ptr) {
  739. final int sz = b.length;
  740. if (ptr == 0)
  741. ptr += 48; // skip the "object ..." line.
  742. while (ptr < sz) {
  743. if (b[ptr] == '\n')
  744. return -1;
  745. final int m = match(b, ptr, tagger);
  746. if (m >= 0)
  747. return m;
  748. ptr = nextLF(b, ptr);
  749. }
  750. return -1;
  751. }
  752. /**
  753. * Locate the "encoding " header line.
  754. *
  755. * @param b
  756. * buffer to scan.
  757. * @param ptr
  758. * position in buffer to start the scan at. Most callers should
  759. * pass 0 to ensure the scan starts from the beginning of the
  760. * buffer and does not accidentally look at the message body.
  761. * @return position just after the space in "encoding ", so the first
  762. * character of the encoding's name. If no encoding header can be
  763. * located -1 is returned (and UTF-8 should be assumed).
  764. */
  765. public static final int encoding(byte[] b, int ptr) {
  766. final int sz = b.length;
  767. while (ptr < sz) {
  768. if (b[ptr] == '\n')
  769. return -1;
  770. if (b[ptr] == 'e')
  771. break;
  772. ptr = nextLF(b, ptr);
  773. }
  774. return match(b, ptr, encoding);
  775. }
  776. /**
  777. * Parse the "encoding " header as a string.
  778. * <p>
  779. * Locates the "encoding " header (if present) and returns its value.
  780. *
  781. * @param b
  782. * buffer to scan.
  783. * @return the encoding header as specified in the commit; null if the
  784. * header was not present and should be assumed.
  785. * @since 4.2
  786. */
  787. @Nullable
  788. public static String parseEncodingName(byte[] b) {
  789. int enc = encoding(b, 0);
  790. if (enc < 0) {
  791. return null;
  792. }
  793. int lf = nextLF(b, enc);
  794. return decode(UTF_8, b, enc, lf - 1);
  795. }
  796. /**
  797. * Parse the "encoding " header into a character set reference.
  798. * <p>
  799. * Locates the "encoding " header (if present) by first calling
  800. * {@link #encoding(byte[], int)} and then returns the proper character set
  801. * to apply to this buffer to evaluate its contents as character data.
  802. * <p>
  803. * If no encoding header is present {@code UTF-8} is assumed.
  804. *
  805. * @param b
  806. * buffer to scan.
  807. * @return the Java character set representation. Never null.
  808. * @throws IllegalCharsetNameException
  809. * if the character set requested by the encoding header is
  810. * malformed and unsupportable.
  811. * @throws UnsupportedCharsetException
  812. * if the JRE does not support the character set requested by
  813. * the encoding header.
  814. */
  815. public static Charset parseEncoding(byte[] b) {
  816. String enc = parseEncodingName(b);
  817. if (enc == null) {
  818. return UTF_8;
  819. }
  820. String name = enc.trim();
  821. try {
  822. return Charset.forName(name);
  823. } catch (IllegalCharsetNameException
  824. | UnsupportedCharsetException badName) {
  825. Charset aliased = charsetForAlias(name);
  826. if (aliased != null) {
  827. return aliased;
  828. }
  829. throw badName;
  830. }
  831. }
  832. /**
  833. * Parse a name string (e.g. author, committer, tagger) into a PersonIdent.
  834. * <p>
  835. * Leading spaces won't be trimmed from the string, i.e. will show up in the
  836. * parsed name afterwards.
  837. *
  838. * @param in
  839. * the string to parse a name from.
  840. * @return the parsed identity or null in case the identity could not be
  841. * parsed.
  842. */
  843. public static PersonIdent parsePersonIdent(String in) {
  844. return parsePersonIdent(Constants.encode(in), 0);
  845. }
  846. /**
  847. * Parse a name line (e.g. author, committer, tagger) into a PersonIdent.
  848. * <p>
  849. * When passing in a value for <code>nameB</code> callers should use the
  850. * return value of {@link #author(byte[], int)} or
  851. * {@link #committer(byte[], int)}, as these methods provide the proper
  852. * position within the buffer.
  853. *
  854. * @param raw
  855. * the buffer to parse character data from.
  856. * @param nameB
  857. * first position of the identity information. This should be the
  858. * first position after the space which delimits the header field
  859. * name (e.g. "author" or "committer") from the rest of the
  860. * identity line.
  861. * @return the parsed identity or null in case the identity could not be
  862. * parsed.
  863. */
  864. public static PersonIdent parsePersonIdent(byte[] raw, int nameB) {
  865. Charset cs;
  866. try {
  867. cs = parseEncoding(raw);
  868. } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
  869. // Assume UTF-8 for person identities, usually this is correct.
  870. // If not decode() will fall back to the ISO-8859-1 encoding.
  871. cs = UTF_8;
  872. }
  873. final int emailB = nextLF(raw, nameB, '<');
  874. final int emailE = nextLF(raw, emailB, '>');
  875. if (emailB >= raw.length || raw[emailB] == '\n' ||
  876. (emailE >= raw.length - 1 && raw[emailE - 1] != '>'))
  877. return null;
  878. final int nameEnd = emailB - 2 >= nameB && raw[emailB - 2] == ' ' ?
  879. emailB - 2 : emailB - 1;
  880. final String name = decode(cs, raw, nameB, nameEnd);
  881. final String email = decode(cs, raw, emailB, emailE - 1);
  882. // Start searching from end of line, as after first name-email pair,
  883. // another name-email pair may occur. We will ignore all kinds of
  884. // "junk" following the first email.
  885. //
  886. // We've to use (emailE - 1) for the case that raw[email] is LF,
  887. // otherwise we would run too far. "-2" is necessary to position
  888. // before the LF in case of LF termination resp. the penultimate
  889. // character if there is no trailing LF.
  890. final int tzBegin = lastIndexOfTrim(raw, ' ',
  891. nextLF(raw, emailE - 1) - 2) + 1;
  892. if (tzBegin <= emailE) // No time/zone, still valid
  893. return new PersonIdent(name, email, 0, 0);
  894. final int whenBegin = Math.max(emailE,
  895. lastIndexOfTrim(raw, ' ', tzBegin - 1) + 1);
  896. if (whenBegin >= tzBegin - 1) // No time/zone, still valid
  897. return new PersonIdent(name, email, 0, 0);
  898. final long when = parseLongBase10(raw, whenBegin, null);
  899. final int tz = parseTimeZoneOffset(raw, tzBegin);
  900. return new PersonIdent(name, email, when * 1000L, tz);
  901. }
  902. /**
  903. * Parse a name data (e.g. as within a reflog) into a PersonIdent.
  904. * <p>
  905. * When passing in a value for <code>nameB</code> callers should use the
  906. * return value of {@link #author(byte[], int)} or
  907. * {@link #committer(byte[], int)}, as these methods provide the proper
  908. * position within the buffer.
  909. *
  910. * @param raw
  911. * the buffer to parse character data from.
  912. * @param nameB
  913. * first position of the identity information. This should be the
  914. * first position after the space which delimits the header field
  915. * name (e.g. "author" or "committer") from the rest of the
  916. * identity line.
  917. * @return the parsed identity. Never null.
  918. */
  919. public static PersonIdent parsePersonIdentOnly(final byte[] raw,
  920. final int nameB) {
  921. int stop = nextLF(raw, nameB);
  922. int emailB = nextLF(raw, nameB, '<');
  923. int emailE = nextLF(raw, emailB, '>');
  924. final String name;
  925. final String email;
  926. if (emailE < stop) {
  927. email = decode(raw, emailB, emailE - 1);
  928. } else {
  929. email = "invalid"; //$NON-NLS-1$
  930. }
  931. if (emailB < stop)
  932. name = decode(raw, nameB, emailB - 2);
  933. else
  934. name = decode(raw, nameB, stop);
  935. final MutableInteger ptrout = new MutableInteger();
  936. long when;
  937. int tz;
  938. if (emailE < stop) {
  939. when = parseLongBase10(raw, emailE + 1, ptrout);
  940. tz = parseTimeZoneOffset(raw, ptrout.value);
  941. } else {
  942. when = 0;
  943. tz = 0;
  944. }
  945. return new PersonIdent(name, email, when * 1000L, tz);
  946. }
  947. /**
  948. * Locate the end of a footer line key string.
  949. * <p>
  950. * If the region at {@code raw[ptr]} matches {@code ^[A-Za-z0-9-]+:} (e.g.
  951. * "Signed-off-by: A. U. Thor\n") then this method returns the position of
  952. * the first ':'.
  953. * <p>
  954. * If the region at {@code raw[ptr]} does not match {@code ^[A-Za-z0-9-]+:}
  955. * then this method returns -1.
  956. *
  957. * @param raw
  958. * buffer to scan.
  959. * @param ptr
  960. * first position within raw to consider as a footer line key.
  961. * @return position of the ':' which terminates the footer line key if this
  962. * is otherwise a valid footer line key; otherwise -1.
  963. */
  964. public static int endOfFooterLineKey(byte[] raw, int ptr) {
  965. try {
  966. for (;;) {
  967. final byte c = raw[ptr];
  968. if (footerLineKeyChars[c] == 0) {
  969. if (c == ':')
  970. return ptr;
  971. return -1;
  972. }
  973. ptr++;
  974. }
  975. } catch (ArrayIndexOutOfBoundsException e) {
  976. return -1;
  977. }
  978. }
  979. /**
  980. * Decode a buffer under UTF-8, if possible.
  981. *
  982. * If the byte stream cannot be decoded that way, the platform default is tried
  983. * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
  984. *
  985. * @param buffer
  986. * buffer to pull raw bytes from.
  987. * @return a string representation of the range <code>[start,end)</code>,
  988. * after decoding the region through the specified character set.
  989. */
  990. public static String decode(byte[] buffer) {
  991. return decode(buffer, 0, buffer.length);
  992. }
  993. /**
  994. * Decode a buffer under UTF-8, if possible.
  995. *
  996. * If the byte stream cannot be decoded that way, the platform default is
  997. * tried and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
  998. *
  999. * @param buffer
  1000. * buffer to pull raw bytes from.
  1001. * @param start
  1002. * start position in buffer
  1003. * @param end
  1004. * one position past the last location within the buffer to take
  1005. * data from.
  1006. * @return a string representation of the range <code>[start,end)</code>,
  1007. * after decoding the region through the specified character set.
  1008. */
  1009. public static String decode(final byte[] buffer, final int start,
  1010. final int end) {
  1011. return decode(UTF_8, buffer, start, end);
  1012. }
  1013. /**
  1014. * Decode a buffer under the specified character set if possible.
  1015. *
  1016. * If the byte stream cannot be decoded that way, the platform default is tried
  1017. * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
  1018. *
  1019. * @param cs
  1020. * character set to use when decoding the buffer.
  1021. * @param buffer
  1022. * buffer to pull raw bytes from.
  1023. * @return a string representation of the range <code>[start,end)</code>,
  1024. * after decoding the region through the specified character set.
  1025. */
  1026. public static String decode(Charset cs, byte[] buffer) {
  1027. return decode(cs, buffer, 0, buffer.length);
  1028. }
  1029. /**
  1030. * Decode a region of the buffer under the specified character set if possible.
  1031. *
  1032. * If the byte stream cannot be decoded that way, the platform default is tried
  1033. * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
  1034. *
  1035. * @param cs
  1036. * character set to use when decoding the buffer.
  1037. * @param buffer
  1038. * buffer to pull raw bytes from.
  1039. * @param start
  1040. * first position within the buffer to take data from.
  1041. * @param end
  1042. * one position past the last location within the buffer to take
  1043. * data from.
  1044. * @return a string representation of the range <code>[start,end)</code>,
  1045. * after decoding the region through the specified character set.
  1046. */
  1047. public static String decode(final Charset cs, final byte[] buffer,
  1048. final int start, final int end) {
  1049. try {
  1050. return decodeNoFallback(cs, buffer, start, end);
  1051. } catch (CharacterCodingException e) {
  1052. // Fall back to an ISO-8859-1 style encoding. At least all of
  1053. // the bytes will be present in the output.
  1054. //
  1055. return extractBinaryString(buffer, start, end);
  1056. }
  1057. }
  1058. /**
  1059. * Decode a region of the buffer under the specified character set if
  1060. * possible.
  1061. *
  1062. * If the byte stream cannot be decoded that way, the platform default is
  1063. * tried and if that too fails, an exception is thrown.
  1064. *
  1065. * @param cs
  1066. * character set to use when decoding the buffer.
  1067. * @param buffer
  1068. * buffer to pull raw bytes from.
  1069. * @param start
  1070. * first position within the buffer to take data from.
  1071. * @param end
  1072. * one position past the last location within the buffer to take
  1073. * data from.
  1074. * @return a string representation of the range <code>[start,end)</code>,
  1075. * after decoding the region through the specified character set.
  1076. * @throws java.nio.charset.CharacterCodingException
  1077. * the input is not in any of the tested character sets.
  1078. */
  1079. public static String decodeNoFallback(final Charset cs,
  1080. final byte[] buffer, final int start, final int end)
  1081. throws CharacterCodingException {
  1082. ByteBuffer b = ByteBuffer.wrap(buffer, start, end - start);
  1083. b.mark();
  1084. // Try our built-in favorite. The assumption here is that
  1085. // decoding will fail if the data is not actually encoded
  1086. // using that encoder.
  1087. try {
  1088. return decode(b, UTF_8);
  1089. } catch (CharacterCodingException e) {
  1090. b.reset();
  1091. }
  1092. if (!cs.equals(UTF_8)) {
  1093. // Try the suggested encoding, it might be right since it was
  1094. // provided by the caller.
  1095. try {
  1096. return decode(b, cs);
  1097. } catch (CharacterCodingException e) {
  1098. b.reset();
  1099. }
  1100. }
  1101. // Try the default character set. A small group of people
  1102. // might actually use the same (or very similar) locale.
  1103. Charset defcs = Charset.defaultCharset();
  1104. if (!defcs.equals(cs) && !defcs.equals(UTF_8)) {
  1105. try {
  1106. return decode(b, defcs);
  1107. } catch (CharacterCodingException e) {
  1108. b.reset();
  1109. }
  1110. }
  1111. throw new CharacterCodingException();
  1112. }
  1113. /**
  1114. * Decode a region of the buffer under the ISO-8859-1 encoding.
  1115. *
  1116. * Each byte is treated as a single character in the 8859-1 character
  1117. * encoding, performing a raw binary-&gt;char conversion.
  1118. *
  1119. * @param buffer
  1120. * buffer to pull raw bytes from.
  1121. * @param start
  1122. * first position within the buffer to take data from.
  1123. * @param end
  1124. * one position past the last location within the buffer to take
  1125. * data from.
  1126. * @return a string representation of the range <code>[start,end)</code>.
  1127. */
  1128. public static String extractBinaryString(final byte[] buffer,
  1129. final int start, final int end) {
  1130. final StringBuilder r = new StringBuilder(end - start);
  1131. for (int i = start; i < end; i++)
  1132. r.append((char) (buffer[i] & 0xff));
  1133. return r.toString();
  1134. }
  1135. private static String decode(ByteBuffer b, Charset charset)
  1136. throws CharacterCodingException {
  1137. final CharsetDecoder d = charset.newDecoder();
  1138. d.onMalformedInput(CodingErrorAction.REPORT);
  1139. d.onUnmappableCharacter(CodingErrorAction.REPORT);
  1140. return d.decode(b).toString();
  1141. }
  1142. /**
  1143. * Locate the position of the commit message body.
  1144. *
  1145. * @param b
  1146. * buffer to scan.
  1147. * @param ptr
  1148. * position in buffer to start the scan at. Most callers should
  1149. * pass 0 to ensure the scan starts from the beginning of the
  1150. * commit buffer.
  1151. * @return position of the user's message buffer.
  1152. */
  1153. public static final int commitMessage(byte[] b, int ptr) {
  1154. final int sz = b.length;
  1155. if (ptr == 0)
  1156. ptr += 46; // skip the "tree ..." line.
  1157. while (ptr < sz && b[ptr] == 'p')
  1158. ptr += 48; // skip this parent.
  1159. // Skip any remaining header lines, ignoring what their actual
  1160. // header line type is. This is identical to the logic for a tag.
  1161. //
  1162. return tagMessage(b, ptr);
  1163. }
  1164. /**
  1165. * Locate the position of the tag message body.
  1166. *
  1167. * @param b
  1168. * buffer to scan.
  1169. * @param ptr
  1170. * position in buffer to start the scan at. Most callers should
  1171. * pass 0 to ensure the scan starts from the beginning of the tag
  1172. * buffer.
  1173. * @return position of the user's message buffer.
  1174. */
  1175. public static final int tagMessage(byte[] b, int ptr) {
  1176. final int sz = b.length;
  1177. if (ptr == 0)
  1178. ptr += 48; // skip the "object ..." line.
  1179. while (ptr < sz && b[ptr] != '\n')
  1180. ptr = nextLF(b, ptr);
  1181. if (ptr < sz && b[ptr] == '\n')
  1182. return ptr + 1;
  1183. return -1;
  1184. }
  1185. /**
  1186. * Locate the end of a paragraph.
  1187. * <p>
  1188. * A paragraph is ended by two consecutive LF bytes or CRLF pairs
  1189. *
  1190. * @param b
  1191. * buffer to scan.
  1192. * @param start
  1193. * position in buffer to start the scan at. Most callers will
  1194. * want to pass the first position of the commit message (as
  1195. * found by {@link #commitMessage(byte[], int)}.
  1196. * @return position of the LF at the end of the paragraph;
  1197. * <code>b.length</code> if no paragraph end could be located.
  1198. */
  1199. public static final int endOfParagraph(byte[] b, int start) {
  1200. int ptr = start;
  1201. final int sz = b.length;
  1202. while (ptr < sz && (b[ptr] != '\n' && b[ptr] != '\r'))
  1203. ptr = nextLF(b, ptr);
  1204. if (ptr > start && b[ptr - 1] == '\n')
  1205. ptr--;
  1206. if (ptr > start && b[ptr - 1] == '\r')
  1207. ptr--;
  1208. return ptr;
  1209. }
  1210. /**
  1211. * Get last index of {@code ch} in raw, trimming spaces.
  1212. *
  1213. * @param raw
  1214. * buffer to scan.
  1215. * @param ch
  1216. * character to find.
  1217. * @param pos
  1218. * starting position.
  1219. * @return last index of {@code ch} in raw, trimming spaces.
  1220. * @since 4.1
  1221. */
  1222. public static int lastIndexOfTrim(byte[] raw, char ch, int pos) {
  1223. while (pos >= 0 && raw[pos] == ' ')
  1224. pos--;
  1225. while (pos >= 0 && raw[pos] != ch)
  1226. pos--;
  1227. return pos;
  1228. }
  1229. private static Charset charsetForAlias(String name) {
  1230. return encodingAliases.get(StringUtils.toLowerCase(name));
  1231. }
  1232. private RawParseUtils() {
  1233. // Don't create instances of a static only utility.
  1234. }
  1235. }