You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

RawParseUtils.java 39KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340
  1. /*
  2. * Copyright (C) 2008-2009, Google Inc.
  3. * Copyright (C) 2006-2008, Shawn O. Pearce <spearce@spearce.org>
  4. * and other copyright owners as documented in the project's IP log.
  5. *
  6. * This program and the accompanying materials are made available
  7. * under the terms of the Eclipse Distribution License v1.0 which
  8. * accompanies this distribution, is reproduced below, and is
  9. * available at http://www.eclipse.org/org/documents/edl-v10.php
  10. *
  11. * All rights reserved.
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. *
  20. * - Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials provided
  23. * with the distribution.
  24. *
  25. * - Neither the name of the Eclipse Foundation, Inc. nor the
  26. * names of its contributors may be used to endorse or promote
  27. * products derived from this software without specific prior
  28. * written permission.
  29. *
  30. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  31. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  32. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  36. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  37. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  38. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  39. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  40. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  42. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. */
  44. package org.eclipse.jgit.util;
  45. import static java.nio.charset.StandardCharsets.ISO_8859_1;
  46. import static java.nio.charset.StandardCharsets.UTF_8;
  47. import static org.eclipse.jgit.lib.ObjectChecker.author;
  48. import static org.eclipse.jgit.lib.ObjectChecker.committer;
  49. import static org.eclipse.jgit.lib.ObjectChecker.encoding;
  50. import static org.eclipse.jgit.lib.ObjectChecker.tagger;
  51. import java.nio.ByteBuffer;
  52. import java.nio.charset.CharacterCodingException;
  53. import java.nio.charset.Charset;
  54. import java.nio.charset.CharsetDecoder;
  55. import java.nio.charset.CodingErrorAction;
  56. import java.nio.charset.IllegalCharsetNameException;
  57. import java.nio.charset.UnsupportedCharsetException;
  58. import java.util.Arrays;
  59. import java.util.HashMap;
  60. import java.util.Map;
  61. import org.eclipse.jgit.annotations.Nullable;
  62. import org.eclipse.jgit.errors.BinaryBlobException;
  63. import org.eclipse.jgit.lib.Constants;
  64. import org.eclipse.jgit.lib.PersonIdent;
  65. /**
  66. * Handy utility functions to parse raw object contents.
  67. */
  68. public final class RawParseUtils {
  69. /**
  70. * UTF-8 charset constant.
  71. *
  72. * @since 2.2
  73. * @deprecated use {@link java.nio.charset.StandardCharsets#UTF_8} instead
  74. */
  75. @Deprecated
  76. public static final Charset UTF8_CHARSET = UTF_8;
  77. private static final byte[] digits10;
  78. private static final byte[] digits16;
  79. private static final byte[] footerLineKeyChars;
  80. private static final Map<String, Charset> encodingAliases;
  81. static {
  82. encodingAliases = new HashMap<>();
  83. encodingAliases.put("latin-1", ISO_8859_1); //$NON-NLS-1$
  84. encodingAliases.put("iso-latin-1", ISO_8859_1); //$NON-NLS-1$
  85. digits10 = new byte['9' + 1];
  86. Arrays.fill(digits10, (byte) -1);
  87. for (char i = '0'; i <= '9'; i++)
  88. digits10[i] = (byte) (i - '0');
  89. digits16 = new byte['f' + 1];
  90. Arrays.fill(digits16, (byte) -1);
  91. for (char i = '0'; i <= '9'; i++)
  92. digits16[i] = (byte) (i - '0');
  93. for (char i = 'a'; i <= 'f'; i++)
  94. digits16[i] = (byte) ((i - 'a') + 10);
  95. for (char i = 'A'; i <= 'F'; i++)
  96. digits16[i] = (byte) ((i - 'A') + 10);
  97. footerLineKeyChars = new byte['z' + 1];
  98. footerLineKeyChars['-'] = 1;
  99. for (char i = '0'; i <= '9'; i++)
  100. footerLineKeyChars[i] = 1;
  101. for (char i = 'A'; i <= 'Z'; i++)
  102. footerLineKeyChars[i] = 1;
  103. for (char i = 'a'; i <= 'z'; i++)
  104. footerLineKeyChars[i] = 1;
  105. }
  106. /**
  107. * Determine if b[ptr] matches src.
  108. *
  109. * @param b
  110. * the buffer to scan.
  111. * @param ptr
  112. * first position within b, this should match src[0].
  113. * @param src
  114. * the buffer to test for equality with b.
  115. * @return ptr + src.length if b[ptr..src.length] == src; else -1.
  116. */
  117. public static final int match(byte[] b, int ptr, byte[] src) {
  118. if (ptr + src.length > b.length)
  119. return -1;
  120. for (int i = 0; i < src.length; i++, ptr++)
  121. if (b[ptr] != src[i])
  122. return -1;
  123. return ptr;
  124. }
  125. private static final byte[] base10byte = { '0', '1', '2', '3', '4', '5',
  126. '6', '7', '8', '9' };
  127. /**
  128. * Format a base 10 numeric into a temporary buffer.
  129. * <p>
  130. * Formatting is performed backwards. The method starts at offset
  131. * <code>o-1</code> and ends at <code>o-1-digits</code>, where
  132. * <code>digits</code> is the number of positions necessary to store the
  133. * base 10 value.
  134. * <p>
  135. * The argument and return values from this method make it easy to chain
  136. * writing, for example:
  137. * </p>
  138. *
  139. * <pre>
  140. * final byte[] tmp = new byte[64];
  141. * int ptr = tmp.length;
  142. * tmp[--ptr] = '\n';
  143. * ptr = RawParseUtils.formatBase10(tmp, ptr, 32);
  144. * tmp[--ptr] = ' ';
  145. * ptr = RawParseUtils.formatBase10(tmp, ptr, 18);
  146. * tmp[--ptr] = 0;
  147. * final String str = new String(tmp, ptr, tmp.length - ptr);
  148. * </pre>
  149. *
  150. * @param b
  151. * buffer to write into.
  152. * @param o
  153. * one offset past the location where writing will begin; writing
  154. * proceeds towards lower index values.
  155. * @param value
  156. * the value to store.
  157. * @return the new offset value <code>o</code>. This is the position of
  158. * the last byte written. Additional writing should start at one
  159. * position earlier.
  160. */
  161. public static int formatBase10(final byte[] b, int o, int value) {
  162. if (value == 0) {
  163. b[--o] = '0';
  164. return o;
  165. }
  166. final boolean isneg = value < 0;
  167. if (isneg)
  168. value = -value;
  169. while (value != 0) {
  170. b[--o] = base10byte[value % 10];
  171. value /= 10;
  172. }
  173. if (isneg)
  174. b[--o] = '-';
  175. return o;
  176. }
  177. /**
  178. * Parse a base 10 numeric from a sequence of ASCII digits into an int.
  179. * <p>
  180. * Digit sequences can begin with an optional run of spaces before the
  181. * sequence, and may start with a '+' or a '-' to indicate sign position.
  182. * Any other characters will cause the method to stop and return the current
  183. * result to the caller.
  184. *
  185. * @param b
  186. * buffer to scan.
  187. * @param ptr
  188. * position within buffer to start parsing digits at.
  189. * @param ptrResult
  190. * optional location to return the new ptr value through. If null
  191. * the ptr value will be discarded.
  192. * @return the value at this location; 0 if the location is not a valid
  193. * numeric.
  194. */
  195. public static final int parseBase10(final byte[] b, int ptr,
  196. final MutableInteger ptrResult) {
  197. int r = 0;
  198. int sign = 0;
  199. try {
  200. final int sz = b.length;
  201. while (ptr < sz && b[ptr] == ' ')
  202. ptr++;
  203. if (ptr >= sz)
  204. return 0;
  205. switch (b[ptr]) {
  206. case '-':
  207. sign = -1;
  208. ptr++;
  209. break;
  210. case '+':
  211. ptr++;
  212. break;
  213. }
  214. while (ptr < sz) {
  215. final byte v = digits10[b[ptr]];
  216. if (v < 0)
  217. break;
  218. r = (r * 10) + v;
  219. ptr++;
  220. }
  221. } catch (ArrayIndexOutOfBoundsException e) {
  222. // Not a valid digit.
  223. }
  224. if (ptrResult != null)
  225. ptrResult.value = ptr;
  226. return sign < 0 ? -r : r;
  227. }
  228. /**
  229. * Parse a base 10 numeric from a sequence of ASCII digits into a long.
  230. * <p>
  231. * Digit sequences can begin with an optional run of spaces before the
  232. * sequence, and may start with a '+' or a '-' to indicate sign position.
  233. * Any other characters will cause the method to stop and return the current
  234. * result to the caller.
  235. *
  236. * @param b
  237. * buffer to scan.
  238. * @param ptr
  239. * position within buffer to start parsing digits at.
  240. * @param ptrResult
  241. * optional location to return the new ptr value through. If null
  242. * the ptr value will be discarded.
  243. * @return the value at this location; 0 if the location is not a valid
  244. * numeric.
  245. */
  246. public static final long parseLongBase10(final byte[] b, int ptr,
  247. final MutableInteger ptrResult) {
  248. long r = 0;
  249. int sign = 0;
  250. try {
  251. final int sz = b.length;
  252. while (ptr < sz && b[ptr] == ' ')
  253. ptr++;
  254. if (ptr >= sz)
  255. return 0;
  256. switch (b[ptr]) {
  257. case '-':
  258. sign = -1;
  259. ptr++;
  260. break;
  261. case '+':
  262. ptr++;
  263. break;
  264. }
  265. while (ptr < sz) {
  266. final byte v = digits10[b[ptr]];
  267. if (v < 0)
  268. break;
  269. r = (r * 10) + v;
  270. ptr++;
  271. }
  272. } catch (ArrayIndexOutOfBoundsException e) {
  273. // Not a valid digit.
  274. }
  275. if (ptrResult != null)
  276. ptrResult.value = ptr;
  277. return sign < 0 ? -r : r;
  278. }
  279. /**
  280. * Parse 4 character base 16 (hex) formatted string to unsigned integer.
  281. * <p>
  282. * The number is read in network byte order, that is, most significant
  283. * nybble first.
  284. *
  285. * @param bs
  286. * buffer to parse digits from; positions {@code [p, p+4)} will
  287. * be parsed.
  288. * @param p
  289. * first position within the buffer to parse.
  290. * @return the integer value.
  291. * @throws java.lang.ArrayIndexOutOfBoundsException
  292. * if the string is not hex formatted.
  293. */
  294. public static final int parseHexInt16(final byte[] bs, final int p) {
  295. int r = digits16[bs[p]] << 4;
  296. r |= digits16[bs[p + 1]];
  297. r <<= 4;
  298. r |= digits16[bs[p + 2]];
  299. r <<= 4;
  300. r |= digits16[bs[p + 3]];
  301. if (r < 0)
  302. throw new ArrayIndexOutOfBoundsException();
  303. return r;
  304. }
  305. /**
  306. * Parse 8 character base 16 (hex) formatted string to unsigned integer.
  307. * <p>
  308. * The number is read in network byte order, that is, most significant
  309. * nybble first.
  310. *
  311. * @param bs
  312. * buffer to parse digits from; positions {@code [p, p+8)} will
  313. * be parsed.
  314. * @param p
  315. * first position within the buffer to parse.
  316. * @return the integer value.
  317. * @throws java.lang.ArrayIndexOutOfBoundsException
  318. * if the string is not hex formatted.
  319. */
  320. public static final int parseHexInt32(final byte[] bs, final int p) {
  321. int r = digits16[bs[p]] << 4;
  322. r |= digits16[bs[p + 1]];
  323. r <<= 4;
  324. r |= digits16[bs[p + 2]];
  325. r <<= 4;
  326. r |= digits16[bs[p + 3]];
  327. r <<= 4;
  328. r |= digits16[bs[p + 4]];
  329. r <<= 4;
  330. r |= digits16[bs[p + 5]];
  331. r <<= 4;
  332. r |= digits16[bs[p + 6]];
  333. final int last = digits16[bs[p + 7]];
  334. if (r < 0 || last < 0)
  335. throw new ArrayIndexOutOfBoundsException();
  336. return (r << 4) | last;
  337. }
  338. /**
  339. * Parse 16 character base 16 (hex) formatted string to unsigned long.
  340. * <p>
  341. * The number is read in network byte order, that is, most significant
  342. * nibble first.
  343. *
  344. * @param bs
  345. * buffer to parse digits from; positions {@code [p, p+16)} will
  346. * be parsed.
  347. * @param p
  348. * first position within the buffer to parse.
  349. * @return the integer value.
  350. * @throws java.lang.ArrayIndexOutOfBoundsException
  351. * if the string is not hex formatted.
  352. * @since 4.3
  353. */
  354. public static final long parseHexInt64(final byte[] bs, final int p) {
  355. long r = digits16[bs[p]] << 4;
  356. r |= digits16[bs[p + 1]];
  357. r <<= 4;
  358. r |= digits16[bs[p + 2]];
  359. r <<= 4;
  360. r |= digits16[bs[p + 3]];
  361. r <<= 4;
  362. r |= digits16[bs[p + 4]];
  363. r <<= 4;
  364. r |= digits16[bs[p + 5]];
  365. r <<= 4;
  366. r |= digits16[bs[p + 6]];
  367. r <<= 4;
  368. r |= digits16[bs[p + 7]];
  369. r <<= 4;
  370. r |= digits16[bs[p + 8]];
  371. r <<= 4;
  372. r |= digits16[bs[p + 9]];
  373. r <<= 4;
  374. r |= digits16[bs[p + 10]];
  375. r <<= 4;
  376. r |= digits16[bs[p + 11]];
  377. r <<= 4;
  378. r |= digits16[bs[p + 12]];
  379. r <<= 4;
  380. r |= digits16[bs[p + 13]];
  381. r <<= 4;
  382. r |= digits16[bs[p + 14]];
  383. final int last = digits16[bs[p + 15]];
  384. if (r < 0 || last < 0)
  385. throw new ArrayIndexOutOfBoundsException();
  386. return (r << 4) | last;
  387. }
  388. /**
  389. * Parse a single hex digit to its numeric value (0-15).
  390. *
  391. * @param digit
  392. * hex character to parse.
  393. * @return numeric value, in the range 0-15.
  394. * @throws java.lang.ArrayIndexOutOfBoundsException
  395. * if the input digit is not a valid hex digit.
  396. */
  397. public static final int parseHexInt4(final byte digit) {
  398. final byte r = digits16[digit];
  399. if (r < 0)
  400. throw new ArrayIndexOutOfBoundsException();
  401. return r;
  402. }
  403. /**
  404. * Parse a Git style timezone string.
  405. * <p>
  406. * The sequence "-0315" will be parsed as the numeric value -195, as the
  407. * lower two positions count minutes, not 100ths of an hour.
  408. *
  409. * @param b
  410. * buffer to scan.
  411. * @param ptr
  412. * position within buffer to start parsing digits at.
  413. * @return the timezone at this location, expressed in minutes.
  414. */
  415. public static final int parseTimeZoneOffset(byte[] b, int ptr) {
  416. return parseTimeZoneOffset(b, ptr, null);
  417. }
  418. /**
  419. * Parse a Git style timezone string.
  420. * <p>
  421. * The sequence "-0315" will be parsed as the numeric value -195, as the
  422. * lower two positions count minutes, not 100ths of an hour.
  423. *
  424. * @param b
  425. * buffer to scan.
  426. * @param ptr
  427. * position within buffer to start parsing digits at.
  428. * @param ptrResult
  429. * optional location to return the new ptr value through. If null
  430. * the ptr value will be discarded.
  431. * @return the timezone at this location, expressed in minutes.
  432. * @since 4.1
  433. */
  434. public static final int parseTimeZoneOffset(final byte[] b, int ptr,
  435. MutableInteger ptrResult) {
  436. final int v = parseBase10(b, ptr, ptrResult);
  437. final int tzMins = v % 100;
  438. final int tzHours = v / 100;
  439. return tzHours * 60 + tzMins;
  440. }
  441. /**
  442. * Locate the first position after a given character.
  443. *
  444. * @param b
  445. * buffer to scan.
  446. * @param ptr
  447. * position within buffer to start looking for chrA at.
  448. * @param chrA
  449. * character to find.
  450. * @return new position just after chrA.
  451. */
  452. public static final int next(byte[] b, int ptr, char chrA) {
  453. final int sz = b.length;
  454. while (ptr < sz) {
  455. if (b[ptr++] == chrA)
  456. return ptr;
  457. }
  458. return ptr;
  459. }
  460. /**
  461. * Locate the first position after the next LF.
  462. * <p>
  463. * This method stops on the first '\n' it finds.
  464. *
  465. * @param b
  466. * buffer to scan.
  467. * @param ptr
  468. * position within buffer to start looking for LF at.
  469. * @return new position just after the first LF found.
  470. */
  471. public static final int nextLF(byte[] b, int ptr) {
  472. return next(b, ptr, '\n');
  473. }
  474. /**
  475. * Locate the first position after either the given character or LF.
  476. * <p>
  477. * This method stops on the first match it finds from either chrA or '\n'.
  478. *
  479. * @param b
  480. * buffer to scan.
  481. * @param ptr
  482. * position within buffer to start looking for chrA or LF at.
  483. * @param chrA
  484. * character to find.
  485. * @return new position just after the first chrA or LF to be found.
  486. */
  487. public static final int nextLF(byte[] b, int ptr, char chrA) {
  488. final int sz = b.length;
  489. while (ptr < sz) {
  490. final byte c = b[ptr++];
  491. if (c == chrA || c == '\n')
  492. return ptr;
  493. }
  494. return ptr;
  495. }
  496. /**
  497. * Locate the end of the header. Note that headers may be
  498. * more than one line long.
  499. * @param b
  500. * buffer to scan.
  501. * @param ptr
  502. * position within buffer to start looking for the end-of-header.
  503. * @return new position just after the header. This is either
  504. * b.length, or the index of the header's terminating newline.
  505. * @since 5.1
  506. */
  507. public static final int headerEnd(final byte[] b, int ptr) {
  508. final int sz = b.length;
  509. while (ptr < sz) {
  510. final byte c = b[ptr++];
  511. if (c == '\n' && (ptr == sz || b[ptr] != ' ')) {
  512. return ptr - 1;
  513. }
  514. }
  515. return ptr - 1;
  516. }
  517. /**
  518. * Find the start of the contents of a given header.
  519. *
  520. * @param b
  521. * buffer to scan.
  522. * @param headerName
  523. * header to search for
  524. * @param ptr
  525. * position within buffer to start looking for header at.
  526. * @return new position at the start of the header's contents, -1 for
  527. * not found
  528. * @since 5.1
  529. */
  530. public static final int headerStart(byte[] headerName, byte[] b, int ptr) {
  531. // Start by advancing to just past a LF or buffer start
  532. if (ptr != 0) {
  533. ptr = nextLF(b, ptr - 1);
  534. }
  535. while (ptr < b.length - (headerName.length + 1)) {
  536. boolean found = true;
  537. for (int i = 0; i < headerName.length; i++) {
  538. if (headerName[i] != b[ptr++]) {
  539. found = false;
  540. break;
  541. }
  542. }
  543. if (found && b[ptr++] == ' ') {
  544. return ptr;
  545. }
  546. ptr = nextLF(b, ptr);
  547. }
  548. return -1;
  549. }
  550. /**
  551. * Locate the first position before a given character.
  552. *
  553. * @param b
  554. * buffer to scan.
  555. * @param ptr
  556. * position within buffer to start looking for chrA at.
  557. * @param chrA
  558. * character to find.
  559. * @return new position just before chrA, -1 for not found
  560. */
  561. public static final int prev(byte[] b, int ptr, char chrA) {
  562. if (ptr == b.length)
  563. --ptr;
  564. while (ptr >= 0) {
  565. if (b[ptr--] == chrA)
  566. return ptr;
  567. }
  568. return ptr;
  569. }
  570. /**
  571. * Locate the first position before the previous LF.
  572. * <p>
  573. * This method stops on the first '\n' it finds.
  574. *
  575. * @param b
  576. * buffer to scan.
  577. * @param ptr
  578. * position within buffer to start looking for LF at.
  579. * @return new position just before the first LF found, -1 for not found
  580. */
  581. public static final int prevLF(byte[] b, int ptr) {
  582. return prev(b, ptr, '\n');
  583. }
  584. /**
  585. * Locate the previous position before either the given character or LF.
  586. * <p>
  587. * This method stops on the first match it finds from either chrA or '\n'.
  588. *
  589. * @param b
  590. * buffer to scan.
  591. * @param ptr
  592. * position within buffer to start looking for chrA or LF at.
  593. * @param chrA
  594. * character to find.
  595. * @return new position just before the first chrA or LF to be found, -1 for
  596. * not found
  597. */
  598. public static final int prevLF(byte[] b, int ptr, char chrA) {
  599. if (ptr == b.length)
  600. --ptr;
  601. while (ptr >= 0) {
  602. final byte c = b[ptr--];
  603. if (c == chrA || c == '\n')
  604. return ptr;
  605. }
  606. return ptr;
  607. }
  608. /**
  609. * Index the region between <code>[ptr, end)</code> to find line starts.
  610. * <p>
  611. * The returned list is 1 indexed. Index 0 contains
  612. * {@link java.lang.Integer#MIN_VALUE} to pad the list out.
  613. * <p>
  614. * Using a 1 indexed list means that line numbers can be directly accessed
  615. * from the list, so <code>list.get(1)</code> (aka get line 1) returns
  616. * <code>ptr</code>.
  617. * <p>
  618. * The last element (index <code>map.size()-1</code>) always contains
  619. * <code>end</code>.
  620. *
  621. * @param buf
  622. * buffer to scan.
  623. * @param ptr
  624. * position within the buffer corresponding to the first byte of
  625. * line 1.
  626. * @param end
  627. * 1 past the end of the content within <code>buf</code>.
  628. * @return a line map indicating the starting position of each line.
  629. */
  630. public static final IntList lineMap(byte[] buf, int ptr, int end) {
  631. IntList map = new IntList((end - ptr) / 36);
  632. map.fillTo(1, Integer.MIN_VALUE);
  633. for (; ptr < end; ptr = nextLF(buf, ptr)) {
  634. map.add(ptr);
  635. }
  636. map.add(end);
  637. return map;
  638. }
  639. /**
  640. * Like {@link #lineMap(byte[], int, int)} but throw
  641. * {@link BinaryBlobException} if a NUL byte is encountered.
  642. *
  643. * @param buf
  644. * buffer to scan.
  645. * @param ptr
  646. * position within the buffer corresponding to the first byte of
  647. * line 1.
  648. * @param end
  649. * 1 past the end of the content within <code>buf</code>.
  650. * @return a line map indicating the starting position of each line.
  651. * @throws BinaryBlobException
  652. * if a NUL byte is found.
  653. * @since 5.0
  654. */
  655. public static final IntList lineMapOrBinary(byte[] buf, int ptr, int end)
  656. throws BinaryBlobException {
  657. IntList map = lineMapOrNull(buf, ptr, end);
  658. if (map == null) {
  659. throw new BinaryBlobException();
  660. }
  661. return map;
  662. }
  663. @Nullable
  664. private static IntList lineMapOrNull(byte[] buf, int ptr, int end) {
  665. // Experimentally derived from multiple source repositories
  666. // the average number of bytes/line is 36. Its a rough guess
  667. // to initially size our map close to the target.
  668. IntList map = new IntList((end - ptr) / 36);
  669. map.add(Integer.MIN_VALUE);
  670. boolean foundLF = true;
  671. for (; ptr < end; ptr++) {
  672. if (foundLF) {
  673. map.add(ptr);
  674. }
  675. if (buf[ptr] == '\0') {
  676. return null;
  677. }
  678. foundLF = (buf[ptr] == '\n');
  679. }
  680. map.add(end);
  681. return map;
  682. }
  683. /**
  684. * Locate the "author " header line data.
  685. *
  686. * @param b
  687. * buffer to scan.
  688. * @param ptr
  689. * position in buffer to start the scan at. Most callers should
  690. * pass 0 to ensure the scan starts from the beginning of the
  691. * commit buffer and does not accidentally look at message body.
  692. * @return position just after the space in "author ", so the first
  693. * character of the author's name. If no author header can be
  694. * located -1 is returned.
  695. */
  696. public static final int author(byte[] b, int ptr) {
  697. final int sz = b.length;
  698. if (ptr == 0)
  699. ptr += 46; // skip the "tree ..." line.
  700. while (ptr < sz && b[ptr] == 'p')
  701. ptr += 48; // skip this parent.
  702. return match(b, ptr, author);
  703. }
  704. /**
  705. * Locate the "committer " header line data.
  706. *
  707. * @param b
  708. * buffer to scan.
  709. * @param ptr
  710. * position in buffer to start the scan at. Most callers should
  711. * pass 0 to ensure the scan starts from the beginning of the
  712. * commit buffer and does not accidentally look at message body.
  713. * @return position just after the space in "committer ", so the first
  714. * character of the committer's name. If no committer header can be
  715. * located -1 is returned.
  716. */
  717. public static final int committer(byte[] b, int ptr) {
  718. final int sz = b.length;
  719. if (ptr == 0)
  720. ptr += 46; // skip the "tree ..." line.
  721. while (ptr < sz && b[ptr] == 'p')
  722. ptr += 48; // skip this parent.
  723. if (ptr < sz && b[ptr] == 'a')
  724. ptr = nextLF(b, ptr);
  725. return match(b, ptr, committer);
  726. }
  727. /**
  728. * Locate the "tagger " header line data.
  729. *
  730. * @param b
  731. * buffer to scan.
  732. * @param ptr
  733. * position in buffer to start the scan at. Most callers should
  734. * pass 0 to ensure the scan starts from the beginning of the tag
  735. * buffer and does not accidentally look at message body.
  736. * @return position just after the space in "tagger ", so the first
  737. * character of the tagger's name. If no tagger header can be
  738. * located -1 is returned.
  739. */
  740. public static final int tagger(byte[] b, int ptr) {
  741. final int sz = b.length;
  742. if (ptr == 0)
  743. ptr += 48; // skip the "object ..." line.
  744. while (ptr < sz) {
  745. if (b[ptr] == '\n')
  746. return -1;
  747. final int m = match(b, ptr, tagger);
  748. if (m >= 0)
  749. return m;
  750. ptr = nextLF(b, ptr);
  751. }
  752. return -1;
  753. }
  754. /**
  755. * Locate the "encoding " header line.
  756. *
  757. * @param b
  758. * buffer to scan.
  759. * @param ptr
  760. * position in buffer to start the scan at. Most callers should
  761. * pass 0 to ensure the scan starts from the beginning of the
  762. * buffer and does not accidentally look at the message body.
  763. * @return position just after the space in "encoding ", so the first
  764. * character of the encoding's name. If no encoding header can be
  765. * located -1 is returned (and UTF-8 should be assumed).
  766. */
  767. public static final int encoding(byte[] b, int ptr) {
  768. final int sz = b.length;
  769. while (ptr < sz) {
  770. if (b[ptr] == '\n')
  771. return -1;
  772. if (b[ptr] == 'e')
  773. break;
  774. ptr = nextLF(b, ptr);
  775. }
  776. return match(b, ptr, encoding);
  777. }
  778. /**
  779. * Parse the "encoding " header as a string.
  780. * <p>
  781. * Locates the "encoding " header (if present) and returns its value.
  782. *
  783. * @param b
  784. * buffer to scan.
  785. * @return the encoding header as specified in the commit; null if the
  786. * header was not present and should be assumed.
  787. * @since 4.2
  788. */
  789. @Nullable
  790. public static String parseEncodingName(byte[] b) {
  791. int enc = encoding(b, 0);
  792. if (enc < 0) {
  793. return null;
  794. }
  795. int lf = nextLF(b, enc);
  796. return decode(UTF_8, b, enc, lf - 1);
  797. }
  798. /**
  799. * Parse the "encoding " header into a character set reference.
  800. * <p>
  801. * Locates the "encoding " header (if present) by first calling
  802. * {@link #encoding(byte[], int)} and then returns the proper character set
  803. * to apply to this buffer to evaluate its contents as character data.
  804. * <p>
  805. * If no encoding header is present {@code UTF-8} is assumed.
  806. *
  807. * @param b
  808. * buffer to scan.
  809. * @return the Java character set representation. Never null.
  810. * @throws IllegalCharsetNameException
  811. * if the character set requested by the encoding header is
  812. * malformed and unsupportable.
  813. * @throws UnsupportedCharsetException
  814. * if the JRE does not support the character set requested by
  815. * the encoding header.
  816. */
  817. public static Charset parseEncoding(byte[] b) {
  818. String enc = parseEncodingName(b);
  819. if (enc == null) {
  820. return UTF_8;
  821. }
  822. String name = enc.trim();
  823. try {
  824. return Charset.forName(name);
  825. } catch (IllegalCharsetNameException
  826. | UnsupportedCharsetException badName) {
  827. Charset aliased = charsetForAlias(name);
  828. if (aliased != null) {
  829. return aliased;
  830. }
  831. throw badName;
  832. }
  833. }
  834. /**
  835. * Parse a name string (e.g. author, committer, tagger) into a PersonIdent.
  836. * <p>
  837. * Leading spaces won't be trimmed from the string, i.e. will show up in the
  838. * parsed name afterwards.
  839. *
  840. * @param in
  841. * the string to parse a name from.
  842. * @return the parsed identity or null in case the identity could not be
  843. * parsed.
  844. */
  845. public static PersonIdent parsePersonIdent(String in) {
  846. return parsePersonIdent(Constants.encode(in), 0);
  847. }
  848. /**
  849. * Parse a name line (e.g. author, committer, tagger) into a PersonIdent.
  850. * <p>
  851. * When passing in a value for <code>nameB</code> callers should use the
  852. * return value of {@link #author(byte[], int)} or
  853. * {@link #committer(byte[], int)}, as these methods provide the proper
  854. * position within the buffer.
  855. *
  856. * @param raw
  857. * the buffer to parse character data from.
  858. * @param nameB
  859. * first position of the identity information. This should be the
  860. * first position after the space which delimits the header field
  861. * name (e.g. "author" or "committer") from the rest of the
  862. * identity line.
  863. * @return the parsed identity or null in case the identity could not be
  864. * parsed.
  865. */
  866. public static PersonIdent parsePersonIdent(byte[] raw, int nameB) {
  867. Charset cs;
  868. try {
  869. cs = parseEncoding(raw);
  870. } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
  871. // Assume UTF-8 for person identities, usually this is correct.
  872. // If not decode() will fall back to the ISO-8859-1 encoding.
  873. cs = UTF_8;
  874. }
  875. final int emailB = nextLF(raw, nameB, '<');
  876. final int emailE = nextLF(raw, emailB, '>');
  877. if (emailB >= raw.length || raw[emailB] == '\n' ||
  878. (emailE >= raw.length - 1 && raw[emailE - 1] != '>'))
  879. return null;
  880. final int nameEnd = emailB - 2 >= nameB && raw[emailB - 2] == ' ' ?
  881. emailB - 2 : emailB - 1;
  882. final String name = decode(cs, raw, nameB, nameEnd);
  883. final String email = decode(cs, raw, emailB, emailE - 1);
  884. // Start searching from end of line, as after first name-email pair,
  885. // another name-email pair may occur. We will ignore all kinds of
  886. // "junk" following the first email.
  887. //
  888. // We've to use (emailE - 1) for the case that raw[email] is LF,
  889. // otherwise we would run too far. "-2" is necessary to position
  890. // before the LF in case of LF termination resp. the penultimate
  891. // character if there is no trailing LF.
  892. final int tzBegin = lastIndexOfTrim(raw, ' ',
  893. nextLF(raw, emailE - 1) - 2) + 1;
  894. if (tzBegin <= emailE) // No time/zone, still valid
  895. return new PersonIdent(name, email, 0, 0);
  896. final int whenBegin = Math.max(emailE,
  897. lastIndexOfTrim(raw, ' ', tzBegin - 1) + 1);
  898. if (whenBegin >= tzBegin - 1) // No time/zone, still valid
  899. return new PersonIdent(name, email, 0, 0);
  900. final long when = parseLongBase10(raw, whenBegin, null);
  901. final int tz = parseTimeZoneOffset(raw, tzBegin);
  902. return new PersonIdent(name, email, when * 1000L, tz);
  903. }
  904. /**
  905. * Parse a name data (e.g. as within a reflog) into a PersonIdent.
  906. * <p>
  907. * When passing in a value for <code>nameB</code> callers should use the
  908. * return value of {@link #author(byte[], int)} or
  909. * {@link #committer(byte[], int)}, as these methods provide the proper
  910. * position within the buffer.
  911. *
  912. * @param raw
  913. * the buffer to parse character data from.
  914. * @param nameB
  915. * first position of the identity information. This should be the
  916. * first position after the space which delimits the header field
  917. * name (e.g. "author" or "committer") from the rest of the
  918. * identity line.
  919. * @return the parsed identity. Never null.
  920. */
  921. public static PersonIdent parsePersonIdentOnly(final byte[] raw,
  922. final int nameB) {
  923. int stop = nextLF(raw, nameB);
  924. int emailB = nextLF(raw, nameB, '<');
  925. int emailE = nextLF(raw, emailB, '>');
  926. final String name;
  927. final String email;
  928. if (emailE < stop) {
  929. email = decode(raw, emailB, emailE - 1);
  930. } else {
  931. email = "invalid"; //$NON-NLS-1$
  932. }
  933. if (emailB < stop)
  934. name = decode(raw, nameB, emailB - 2);
  935. else
  936. name = decode(raw, nameB, stop);
  937. final MutableInteger ptrout = new MutableInteger();
  938. long when;
  939. int tz;
  940. if (emailE < stop) {
  941. when = parseLongBase10(raw, emailE + 1, ptrout);
  942. tz = parseTimeZoneOffset(raw, ptrout.value);
  943. } else {
  944. when = 0;
  945. tz = 0;
  946. }
  947. return new PersonIdent(name, email, when * 1000L, tz);
  948. }
  949. /**
  950. * Locate the end of a footer line key string.
  951. * <p>
  952. * If the region at {@code raw[ptr]} matches {@code ^[A-Za-z0-9-]+:} (e.g.
  953. * "Signed-off-by: A. U. Thor\n") then this method returns the position of
  954. * the first ':'.
  955. * <p>
  956. * If the region at {@code raw[ptr]} does not match {@code ^[A-Za-z0-9-]+:}
  957. * then this method returns -1.
  958. *
  959. * @param raw
  960. * buffer to scan.
  961. * @param ptr
  962. * first position within raw to consider as a footer line key.
  963. * @return position of the ':' which terminates the footer line key if this
  964. * is otherwise a valid footer line key; otherwise -1.
  965. */
  966. public static int endOfFooterLineKey(byte[] raw, int ptr) {
  967. try {
  968. for (;;) {
  969. final byte c = raw[ptr];
  970. if (footerLineKeyChars[c] == 0) {
  971. if (c == ':')
  972. return ptr;
  973. return -1;
  974. }
  975. ptr++;
  976. }
  977. } catch (ArrayIndexOutOfBoundsException e) {
  978. return -1;
  979. }
  980. }
  981. /**
  982. * Decode a buffer under UTF-8, if possible.
  983. *
  984. * If the byte stream cannot be decoded that way, the platform default is tried
  985. * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
  986. *
  987. * @param buffer
  988. * buffer to pull raw bytes from.
  989. * @return a string representation of the range <code>[start,end)</code>,
  990. * after decoding the region through the specified character set.
  991. */
  992. public static String decode(byte[] buffer) {
  993. return decode(buffer, 0, buffer.length);
  994. }
  995. /**
  996. * Decode a buffer under UTF-8, if possible.
  997. *
  998. * If the byte stream cannot be decoded that way, the platform default is
  999. * tried and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
  1000. *
  1001. * @param buffer
  1002. * buffer to pull raw bytes from.
  1003. * @param start
  1004. * start position in buffer
  1005. * @param end
  1006. * one position past the last location within the buffer to take
  1007. * data from.
  1008. * @return a string representation of the range <code>[start,end)</code>,
  1009. * after decoding the region through the specified character set.
  1010. */
  1011. public static String decode(final byte[] buffer, final int start,
  1012. final int end) {
  1013. return decode(UTF_8, buffer, start, end);
  1014. }
  1015. /**
  1016. * Decode a buffer under the specified character set if possible.
  1017. *
  1018. * If the byte stream cannot be decoded that way, the platform default is tried
  1019. * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
  1020. *
  1021. * @param cs
  1022. * character set to use when decoding the buffer.
  1023. * @param buffer
  1024. * buffer to pull raw bytes from.
  1025. * @return a string representation of the range <code>[start,end)</code>,
  1026. * after decoding the region through the specified character set.
  1027. */
  1028. public static String decode(Charset cs, byte[] buffer) {
  1029. return decode(cs, buffer, 0, buffer.length);
  1030. }
  1031. /**
  1032. * Decode a region of the buffer under the specified character set if possible.
  1033. *
  1034. * If the byte stream cannot be decoded that way, the platform default is tried
  1035. * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
  1036. *
  1037. * @param cs
  1038. * character set to use when decoding the buffer.
  1039. * @param buffer
  1040. * buffer to pull raw bytes from.
  1041. * @param start
  1042. * first position within the buffer to take data from.
  1043. * @param end
  1044. * one position past the last location within the buffer to take
  1045. * data from.
  1046. * @return a string representation of the range <code>[start,end)</code>,
  1047. * after decoding the region through the specified character set.
  1048. */
  1049. public static String decode(final Charset cs, final byte[] buffer,
  1050. final int start, final int end) {
  1051. try {
  1052. return decodeNoFallback(cs, buffer, start, end);
  1053. } catch (CharacterCodingException e) {
  1054. // Fall back to an ISO-8859-1 style encoding. At least all of
  1055. // the bytes will be present in the output.
  1056. //
  1057. return extractBinaryString(buffer, start, end);
  1058. }
  1059. }
  1060. /**
  1061. * Decode a region of the buffer under the specified character set if
  1062. * possible.
  1063. *
  1064. * If the byte stream cannot be decoded that way, the platform default is
  1065. * tried and if that too fails, an exception is thrown.
  1066. *
  1067. * @param cs
  1068. * character set to use when decoding the buffer.
  1069. * @param buffer
  1070. * buffer to pull raw bytes from.
  1071. * @param start
  1072. * first position within the buffer to take data from.
  1073. * @param end
  1074. * one position past the last location within the buffer to take
  1075. * data from.
  1076. * @return a string representation of the range <code>[start,end)</code>,
  1077. * after decoding the region through the specified character set.
  1078. * @throws java.nio.charset.CharacterCodingException
  1079. * the input is not in any of the tested character sets.
  1080. */
  1081. public static String decodeNoFallback(final Charset cs,
  1082. final byte[] buffer, final int start, final int end)
  1083. throws CharacterCodingException {
  1084. ByteBuffer b = ByteBuffer.wrap(buffer, start, end - start);
  1085. b.mark();
  1086. // Try our built-in favorite. The assumption here is that
  1087. // decoding will fail if the data is not actually encoded
  1088. // using that encoder.
  1089. try {
  1090. return decode(b, UTF_8);
  1091. } catch (CharacterCodingException e) {
  1092. b.reset();
  1093. }
  1094. if (!cs.equals(UTF_8)) {
  1095. // Try the suggested encoding, it might be right since it was
  1096. // provided by the caller.
  1097. try {
  1098. return decode(b, cs);
  1099. } catch (CharacterCodingException e) {
  1100. b.reset();
  1101. }
  1102. }
  1103. // Try the default character set. A small group of people
  1104. // might actually use the same (or very similar) locale.
  1105. Charset defcs = Charset.defaultCharset();
  1106. if (!defcs.equals(cs) && !defcs.equals(UTF_8)) {
  1107. try {
  1108. return decode(b, defcs);
  1109. } catch (CharacterCodingException e) {
  1110. b.reset();
  1111. }
  1112. }
  1113. throw new CharacterCodingException();
  1114. }
  1115. /**
  1116. * Decode a region of the buffer under the ISO-8859-1 encoding.
  1117. *
  1118. * Each byte is treated as a single character in the 8859-1 character
  1119. * encoding, performing a raw binary-&gt;char conversion.
  1120. *
  1121. * @param buffer
  1122. * buffer to pull raw bytes from.
  1123. * @param start
  1124. * first position within the buffer to take data from.
  1125. * @param end
  1126. * one position past the last location within the buffer to take
  1127. * data from.
  1128. * @return a string representation of the range <code>[start,end)</code>.
  1129. */
  1130. public static String extractBinaryString(final byte[] buffer,
  1131. final int start, final int end) {
  1132. final StringBuilder r = new StringBuilder(end - start);
  1133. for (int i = start; i < end; i++)
  1134. r.append((char) (buffer[i] & 0xff));
  1135. return r.toString();
  1136. }
  1137. private static String decode(ByteBuffer b, Charset charset)
  1138. throws CharacterCodingException {
  1139. final CharsetDecoder d = charset.newDecoder();
  1140. d.onMalformedInput(CodingErrorAction.REPORT);
  1141. d.onUnmappableCharacter(CodingErrorAction.REPORT);
  1142. return d.decode(b).toString();
  1143. }
  1144. /**
  1145. * Locate the position of the commit message body.
  1146. *
  1147. * @param b
  1148. * buffer to scan.
  1149. * @param ptr
  1150. * position in buffer to start the scan at. Most callers should
  1151. * pass 0 to ensure the scan starts from the beginning of the
  1152. * commit buffer.
  1153. * @return position of the user's message buffer.
  1154. */
  1155. public static final int commitMessage(byte[] b, int ptr) {
  1156. final int sz = b.length;
  1157. if (ptr == 0)
  1158. ptr += 46; // skip the "tree ..." line.
  1159. while (ptr < sz && b[ptr] == 'p')
  1160. ptr += 48; // skip this parent.
  1161. // Skip any remaining header lines, ignoring what their actual
  1162. // header line type is. This is identical to the logic for a tag.
  1163. //
  1164. return tagMessage(b, ptr);
  1165. }
  1166. /**
  1167. * Locate the position of the tag message body.
  1168. *
  1169. * @param b
  1170. * buffer to scan.
  1171. * @param ptr
  1172. * position in buffer to start the scan at. Most callers should
  1173. * pass 0 to ensure the scan starts from the beginning of the tag
  1174. * buffer.
  1175. * @return position of the user's message buffer.
  1176. */
  1177. public static final int tagMessage(byte[] b, int ptr) {
  1178. final int sz = b.length;
  1179. if (ptr == 0)
  1180. ptr += 48; // skip the "object ..." line.
  1181. while (ptr < sz && b[ptr] != '\n')
  1182. ptr = nextLF(b, ptr);
  1183. if (ptr < sz && b[ptr] == '\n')
  1184. return ptr + 1;
  1185. return -1;
  1186. }
  1187. /**
  1188. * Locate the end of a paragraph.
  1189. * <p>
  1190. * A paragraph is ended by two consecutive LF bytes or CRLF pairs
  1191. *
  1192. * @param b
  1193. * buffer to scan.
  1194. * @param start
  1195. * position in buffer to start the scan at. Most callers will
  1196. * want to pass the first position of the commit message (as
  1197. * found by {@link #commitMessage(byte[], int)}.
  1198. * @return position of the LF at the end of the paragraph;
  1199. * <code>b.length</code> if no paragraph end could be located.
  1200. */
  1201. public static final int endOfParagraph(byte[] b, int start) {
  1202. int ptr = start;
  1203. final int sz = b.length;
  1204. while (ptr < sz && (b[ptr] != '\n' && b[ptr] != '\r'))
  1205. ptr = nextLF(b, ptr);
  1206. if (ptr > start && b[ptr - 1] == '\n')
  1207. ptr--;
  1208. if (ptr > start && b[ptr - 1] == '\r')
  1209. ptr--;
  1210. return ptr;
  1211. }
  1212. /**
  1213. * Get last index of {@code ch} in raw, trimming spaces.
  1214. *
  1215. * @param raw
  1216. * buffer to scan.
  1217. * @param ch
  1218. * character to find.
  1219. * @param pos
  1220. * starting position.
  1221. * @return last index of {@code ch} in raw, trimming spaces.
  1222. * @since 4.1
  1223. */
  1224. public static int lastIndexOfTrim(byte[] raw, char ch, int pos) {
  1225. while (pos >= 0 && raw[pos] == ' ')
  1226. pos--;
  1227. while (pos >= 0 && raw[pos] != ch)
  1228. pos--;
  1229. return pos;
  1230. }
  1231. private static Charset charsetForAlias(String name) {
  1232. return encodingAliases.get(StringUtils.toLowerCase(name));
  1233. }
  1234. private RawParseUtils() {
  1235. // Don't create instances of a static only utility.
  1236. }
  1237. }