You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

GlyphSequence.java 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.complexscripts.util;
  19. import java.nio.IntBuffer;
  20. import java.util.ArrayList;
  21. import java.util.Collections;
  22. import java.util.List;
  23. // CSOFF: LineLengthCheck
  24. /**
  25. * <p>A GlyphSequence encapsulates a sequence of character codes, a sequence of glyph codes,
  26. * and a sequence of character associations, where, for each glyph in the sequence of glyph
  27. * codes, there is a corresponding character association. Character associations server to
  28. * relate the glyph codes in a glyph sequence to the specific characters in an original
  29. * character code sequence with which the glyph codes are associated.</p>
  30. *
  31. * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
  32. */
  33. public class GlyphSequence implements Cloneable {
  34. /** default character buffer capacity in case new character buffer is created */
  35. private static final int DEFAULT_CHARS_CAPACITY = 8;
  36. /** character buffer */
  37. private IntBuffer characters;
  38. /** glyph buffer */
  39. private IntBuffer glyphs;
  40. /** association list */
  41. private List associations;
  42. /** predications flag */
  43. private boolean predications;
  44. protected GlyphSequence unprocessedGS;
  45. /**
  46. * Instantiate a glyph sequence, reusing (i.e., not copying) the referenced
  47. * character and glyph buffers and associations. If characters is null, then
  48. * an empty character buffer is created. If glyphs is null, then a glyph buffer
  49. * is created whose capacity is that of the character buffer. If associations is
  50. * null, then identity associations are created.
  51. * @param characters a (possibly null) buffer of associated (originating) characters
  52. * @param glyphs a (possibly null) buffer of glyphs
  53. * @param associations a (possibly null) array of glyph to character associations
  54. * @param predications true if predications are enabled
  55. */
  56. public GlyphSequence(IntBuffer characters, IntBuffer glyphs, List associations, boolean predications) {
  57. if (characters == null) {
  58. characters = IntBuffer.allocate(DEFAULT_CHARS_CAPACITY);
  59. }
  60. if (glyphs == null) {
  61. glyphs = IntBuffer.allocate(characters.capacity());
  62. }
  63. if (associations == null) {
  64. associations = makeIdentityAssociations(characters.limit(), glyphs.limit());
  65. }
  66. this.characters = characters;
  67. this.glyphs = glyphs;
  68. this.associations = associations;
  69. this.predications = predications;
  70. unprocessedGS = this;
  71. }
  72. /**
  73. * Instantiate a glyph sequence, reusing (i.e., not copying) the referenced
  74. * character and glyph buffers and associations. If characters is null, then
  75. * an empty character buffer is created. If glyphs is null, then a glyph buffer
  76. * is created whose capacity is that of the character buffer. If associations is
  77. * null, then identity associations are created.
  78. * @param characters a (possibly null) buffer of associated (originating) characters
  79. * @param glyphs a (possibly null) buffer of glyphs
  80. * @param associations a (possibly null) array of glyph to character associations
  81. */
  82. public GlyphSequence(IntBuffer characters, IntBuffer glyphs, List associations) {
  83. this (characters, glyphs, associations, false);
  84. }
  85. /**
  86. * Instantiate a glyph sequence using an existing glyph sequence, where the new glyph sequence shares
  87. * the character array of the existing sequence (but not the buffer object), and creates new copies
  88. * of glyphs buffer and association list.
  89. * @param gs an existing glyph sequence
  90. */
  91. public GlyphSequence(GlyphSequence gs) {
  92. this (gs.characters.duplicate(), copyBuffer(gs.glyphs), copyAssociations(gs.associations), gs.predications);
  93. this.unprocessedGS = gs.unprocessedGS;
  94. }
  95. /**
  96. * Instantiate a glyph sequence using an existing glyph sequence, where the new glyph sequence shares
  97. * the character array of the existing sequence (but not the buffer object), but uses the specified
  98. * backtrack, input, and lookahead glyph arrays to populate the glyphs, and uses the specified
  99. * of glyphs buffer and association list.
  100. * backtrack, input, and lookahead association arrays to populate the associations.
  101. * @param gs an existing glyph sequence
  102. * @param bga backtrack glyph array
  103. * @param iga input glyph array
  104. * @param lga lookahead glyph array
  105. * @param bal backtrack association list
  106. * @param ial input association list
  107. * @param lal lookahead association list
  108. */
  109. public GlyphSequence(GlyphSequence gs, int[] bga, int[] iga, int[] lga, CharAssociation[] bal, CharAssociation[] ial, CharAssociation[] lal) {
  110. this (gs.characters.duplicate(), concatGlyphs(bga, iga, lga), concatAssociations(bal, ial, lal), gs.predications);
  111. }
  112. /**
  113. * Obtain reference to underlying character buffer.
  114. * @return character buffer reference
  115. */
  116. public IntBuffer getCharacters() {
  117. return characters;
  118. }
  119. /**
  120. * Obtain array of characters. If <code>copy</code> is true, then
  121. * a newly instantiated array is returned, otherwise a reference to
  122. * the underlying buffer's array is returned. N.B. in case a reference
  123. * to the undelying buffer's array is returned, the length
  124. * of the array is not necessarily the number of characters in array.
  125. * To determine the number of characters, use {@link #getCharacterCount}.
  126. * @param copy true if to return a newly instantiated array of characters
  127. * @return array of characters
  128. */
  129. public int[] getCharacterArray(boolean copy) {
  130. if (copy) {
  131. return toArray(characters);
  132. } else {
  133. return characters.array();
  134. }
  135. }
  136. /**
  137. * Obtain the number of characters in character array, where
  138. * each character constitutes a unicode scalar value.
  139. * NB: Supplementary characters (non-BMP code points) count as 1
  140. * character, not as two UTF-16 code units.
  141. * @return number of characters available in character array
  142. */
  143. public int getCharacterCount() {
  144. return characters.limit();
  145. }
  146. /**
  147. * Obtain the number of characters in character array, where
  148. * each character constitutes a UTF-16 character. This means
  149. * that every non-BMP character is counted as 2 characters.
  150. * @return number of chars (UTF-16 code units) available in
  151. * character array
  152. */
  153. public int getUTF16CharacterCount() {
  154. int count = 0;
  155. for (int ch : characters.array()) {
  156. count += Character.charCount(ch);
  157. }
  158. return count;
  159. }
  160. /**
  161. * Obtain glyph id at specified index.
  162. * @param index to obtain glyph
  163. * @return the glyph identifier of glyph at specified index
  164. * @throws IndexOutOfBoundsException if index is less than zero
  165. * or exceeds last valid position
  166. */
  167. public int getGlyph(int index) throws IndexOutOfBoundsException {
  168. return glyphs.get(index);
  169. }
  170. public int getUnprocessedGlyph(int index) throws IndexOutOfBoundsException {
  171. return unprocessedGS.getGlyph(index);
  172. }
  173. public void setUnprocessedGS(GlyphSequence glyphSequence) {
  174. unprocessedGS = glyphSequence;
  175. }
  176. /**
  177. * Set glyph id at specified index.
  178. * @param index to set glyph
  179. * @param gi glyph index
  180. * @throws IndexOutOfBoundsException if index is greater or equal to
  181. * the limit of the underlying glyph buffer
  182. */
  183. public void setGlyph(int index, int gi) throws IndexOutOfBoundsException {
  184. if (gi > 65535) {
  185. gi = 65535;
  186. }
  187. glyphs.put(index, gi);
  188. }
  189. /**
  190. * Obtain reference to underlying glyph buffer.
  191. * @return glyph buffer reference
  192. */
  193. public IntBuffer getGlyphs() {
  194. return glyphs;
  195. }
  196. /**
  197. * Obtain count glyphs starting at offset. If <code>count</code> is
  198. * negative, then it is treated as if the number of available glyphs
  199. * were specified.
  200. * @param offset into glyph sequence
  201. * @param count of glyphs to obtain starting at offset, or negative,
  202. * indicating all avaialble glyphs starting at offset
  203. * @return glyph array
  204. */
  205. public int[] getGlyphs(int offset, int count) {
  206. int ng = getGlyphCount();
  207. if (offset < 0) {
  208. offset = 0;
  209. } else if (offset > ng) {
  210. offset = ng;
  211. }
  212. if (count < 0) {
  213. count = ng - offset;
  214. }
  215. int[] ga = new int [ count ];
  216. for (int i = offset, n = offset + count, k = 0; i < n; i++) {
  217. if (k < ga.length) {
  218. ga [ k++ ] = glyphs.get(i);
  219. }
  220. }
  221. return ga;
  222. }
  223. /**
  224. * Obtain array of glyphs. If <code>copy</code> is true, then
  225. * a newly instantiated array is returned, otherwise a reference to
  226. * the underlying buffer's array is returned. N.B. in case a reference
  227. * to the undelying buffer's array is returned, the length
  228. * of the array is not necessarily the number of glyphs in array.
  229. * To determine the number of glyphs, use {@link #getGlyphCount}.
  230. * @param copy true if to return a newly instantiated array of glyphs
  231. * @return array of glyphs
  232. */
  233. public int[] getGlyphArray(boolean copy) {
  234. if (copy) {
  235. return toArray(glyphs);
  236. } else {
  237. return glyphs.array();
  238. }
  239. }
  240. /**
  241. * Obtain the number of glyphs in glyphs array, where
  242. * each glyph constitutes a font specific glyph index.
  243. * @return number of glyphs available in character array
  244. */
  245. public int getGlyphCount() {
  246. return glyphs.limit();
  247. }
  248. /**
  249. * Obtain association at specified index.
  250. * @param index into associations array
  251. * @return glyph to character associations at specified index
  252. * @throws IndexOutOfBoundsException if index is less than zero
  253. * or exceeds last valid position
  254. */
  255. public CharAssociation getAssociation(int index) throws IndexOutOfBoundsException {
  256. return (CharAssociation) associations.get(index);
  257. }
  258. /**
  259. * Obtain reference to underlying associations list.
  260. * @return associations list
  261. */
  262. public List getAssociations() {
  263. return associations;
  264. }
  265. /**
  266. * Obtain count associations starting at offset.
  267. * @param offset into glyph sequence
  268. * @param count of associations to obtain starting at offset, or negative,
  269. * indicating all avaialble associations starting at offset
  270. * @return associations
  271. */
  272. public CharAssociation[] getAssociations(int offset, int count) {
  273. int ng = getGlyphCount();
  274. if (offset < 0) {
  275. offset = 0;
  276. } else if (offset > ng) {
  277. offset = ng;
  278. }
  279. if (count < 0) {
  280. count = ng - offset;
  281. }
  282. CharAssociation[] aa = new CharAssociation [ count ];
  283. for (int i = offset, n = offset + count, k = 0; i < n; i++) {
  284. if (k < aa.length) {
  285. aa [ k++ ] = (CharAssociation) associations.get(i);
  286. }
  287. }
  288. return aa;
  289. }
  290. /**
  291. * Enable or disable predications.
  292. * @param enable true if predications are to be enabled; otherwise false to disable
  293. */
  294. public void setPredications(boolean enable) {
  295. this.predications = enable;
  296. }
  297. /**
  298. * Obtain predications state.
  299. * @return true if predications are enabled
  300. */
  301. public boolean getPredications() {
  302. return this.predications;
  303. }
  304. /**
  305. * Set predication &lt;KEY,VALUE&gt; at glyph sequence OFFSET.
  306. * @param offset offset (index) into glyph sequence
  307. * @param key predication key
  308. * @param value predication value
  309. */
  310. public void setPredication(int offset, String key, Object value) {
  311. if (predications) {
  312. CharAssociation[] aa = getAssociations(offset, 1);
  313. CharAssociation ca = aa[0];
  314. ca.setPredication(key, value);
  315. }
  316. }
  317. /**
  318. * Get predication KEY at glyph sequence OFFSET.
  319. * @param offset offset (index) into glyph sequence
  320. * @param key predication key
  321. * @return predication KEY at OFFSET or null if none exists
  322. */
  323. public Object getPredication(int offset, String key) {
  324. if (predications) {
  325. CharAssociation[] aa = getAssociations(offset, 1);
  326. CharAssociation ca = aa[0];
  327. return ca.getPredication(key);
  328. } else {
  329. return null;
  330. }
  331. }
  332. /**
  333. * Compare glyphs.
  334. * @param gb buffer containing glyph indices with which this glyph sequence's glyphs are to be compared
  335. * @return zero if glyphs are the same, otherwise returns 1 or -1 according to whether this glyph sequence's
  336. * glyphs are lexicographically greater or lesser than the glyphs in the specified string buffer
  337. */
  338. public int compareGlyphs(IntBuffer gb) {
  339. int ng = getGlyphCount();
  340. for (int i = 0, n = gb.limit(); i < n; i++) {
  341. if (i < ng) {
  342. int g1 = glyphs.get(i);
  343. int g2 = gb.get(i);
  344. if (g1 > g2) {
  345. return 1;
  346. } else if (g1 < g2) {
  347. return -1;
  348. }
  349. } else {
  350. return -1; // this gb is a proper prefix of specified gb
  351. }
  352. }
  353. return 0; // same lengths with no difference
  354. }
  355. /** {@inheritDoc} */
  356. public Object clone() {
  357. try {
  358. GlyphSequence gs = (GlyphSequence) super.clone();
  359. gs.characters = copyBuffer(characters);
  360. gs.glyphs = copyBuffer(glyphs);
  361. gs.associations = copyAssociations(associations);
  362. return gs;
  363. } catch (CloneNotSupportedException e) {
  364. return null;
  365. }
  366. }
  367. /** {@inheritDoc} */
  368. public String toString() {
  369. StringBuffer sb = new StringBuffer();
  370. sb.append('{');
  371. sb.append("chars = [");
  372. sb.append(characters);
  373. sb.append("], glyphs = [");
  374. sb.append(glyphs);
  375. sb.append("], associations = [");
  376. sb.append(associations);
  377. sb.append("]");
  378. sb.append('}');
  379. return sb.toString();
  380. }
  381. /**
  382. * Determine if two arrays of glyphs are identical.
  383. * @param ga1 first glyph array
  384. * @param ga2 second glyph array
  385. * @return true if arrays are botth null or both non-null and have identical elements
  386. */
  387. public static boolean sameGlyphs(int[] ga1, int[] ga2) {
  388. if (ga1 == ga2) {
  389. return true;
  390. } else if ((ga1 == null) || (ga2 == null)) {
  391. return false;
  392. } else if (ga1.length != ga2.length) {
  393. return false;
  394. } else {
  395. for (int i = 0, n = ga1.length; i < n; i++) {
  396. if (ga1[i] != ga2[i]) {
  397. return false;
  398. }
  399. }
  400. return true;
  401. }
  402. }
  403. /**
  404. * Concatenante glyph arrays.
  405. * @param bga backtrack glyph array
  406. * @param iga input glyph array
  407. * @param lga lookahead glyph array
  408. * @return new integer buffer containing concatenated glyphs
  409. */
  410. public static IntBuffer concatGlyphs(int[] bga, int[] iga, int[] lga) {
  411. int ng = 0;
  412. if (bga != null) {
  413. ng += bga.length;
  414. }
  415. if (iga != null) {
  416. ng += iga.length;
  417. }
  418. if (lga != null) {
  419. ng += lga.length;
  420. }
  421. IntBuffer gb = IntBuffer.allocate(ng);
  422. if (bga != null) {
  423. gb.put(bga);
  424. }
  425. if (iga != null) {
  426. gb.put(iga);
  427. }
  428. if (lga != null) {
  429. gb.put(lga);
  430. }
  431. gb.flip();
  432. return gb;
  433. }
  434. /**
  435. * Concatenante association arrays.
  436. * @param baa backtrack association array
  437. * @param iaa input association array
  438. * @param laa lookahead association array
  439. * @return new list containing concatenated associations
  440. */
  441. public static List concatAssociations(CharAssociation[] baa, CharAssociation[] iaa, CharAssociation[] laa) {
  442. int na = 0;
  443. if (baa != null) {
  444. na += baa.length;
  445. }
  446. if (iaa != null) {
  447. na += iaa.length;
  448. }
  449. if (laa != null) {
  450. na += laa.length;
  451. }
  452. if (na > 0) {
  453. List gl = new ArrayList(na);
  454. if (baa != null) {
  455. Collections.addAll(gl, baa);
  456. }
  457. if (iaa != null) {
  458. Collections.addAll(gl, iaa);
  459. }
  460. if (laa != null) {
  461. Collections.addAll(gl, laa);
  462. }
  463. return gl;
  464. } else {
  465. return null;
  466. }
  467. }
  468. /**
  469. * Join (concatenate) glyph sequences.
  470. * @param gs original glyph sequence from which to reuse character array reference
  471. * @param sa array of glyph sequences, whose glyph arrays and association lists are to be concatenated
  472. * @return new glyph sequence referring to character array of GS and concatenated glyphs and associations of SA
  473. */
  474. public static GlyphSequence join(GlyphSequence gs, GlyphSequence[] sa) {
  475. assert sa != null;
  476. int tg = 0;
  477. int ta = 0;
  478. for (GlyphSequence s : sa) {
  479. IntBuffer ga = s.getGlyphs();
  480. assert ga != null;
  481. int ng = ga.limit();
  482. List al = s.getAssociations();
  483. assert al != null;
  484. int na = al.size();
  485. assert na == ng;
  486. tg += ng;
  487. ta += na;
  488. }
  489. IntBuffer uga = IntBuffer.allocate(tg);
  490. ArrayList ual = new ArrayList(ta);
  491. for (GlyphSequence s : sa) {
  492. uga.put(s.getGlyphs());
  493. ual.addAll(s.getAssociations());
  494. }
  495. return new GlyphSequence(gs.getCharacters(), uga, ual, gs.getPredications());
  496. }
  497. /**
  498. * Reorder sequence such that [SOURCE,SOURCE+COUNT) is moved just prior to TARGET.
  499. * @param gs input sequence
  500. * @param source index of sub-sequence to reorder
  501. * @param count length of sub-sequence to reorder
  502. * @param target index to which source sub-sequence is to be moved
  503. * @return reordered sequence (or original if no reordering performed)
  504. */
  505. public static GlyphSequence reorder(GlyphSequence gs, int source, int count, int target) {
  506. if (source != target) {
  507. int ng = gs.getGlyphCount();
  508. int[] ga = gs.getGlyphArray(false);
  509. int[] nga = new int [ ng ];
  510. CharAssociation[] aa = gs.getAssociations(0, ng);
  511. CharAssociation[] naa = new CharAssociation [ ng ];
  512. if (source < target) {
  513. int t = 0;
  514. for (int s = 0, e = source; s < e; s++, t++) {
  515. nga[t] = ga[s];
  516. naa[t] = aa[s];
  517. }
  518. for (int s = source + count, e = target; s < e; s++, t++) {
  519. nga[t] = ga[s];
  520. naa[t] = aa[s];
  521. }
  522. for (int s = source, e = source + count; s < e; s++, t++) {
  523. nga[t] = ga[s];
  524. naa[t] = aa[s];
  525. }
  526. for (int s = target, e = ng; s < e; s++, t++) {
  527. nga[t] = ga[s];
  528. naa[t] = aa[s];
  529. }
  530. } else {
  531. int t = 0;
  532. for (int s = 0, e = target; s < e; s++, t++) {
  533. nga[t] = ga[s];
  534. naa[t] = aa[s];
  535. }
  536. for (int s = source, e = source + count; s < e; s++, t++) {
  537. nga[t] = ga[s];
  538. naa[t] = aa[s];
  539. }
  540. for (int s = target, e = source; s < e; s++, t++) {
  541. nga[t] = ga[s];
  542. naa[t] = aa[s];
  543. }
  544. for (int s = source + count, e = ng; s < e; s++, t++) {
  545. nga[t] = ga[s];
  546. naa[t] = aa[s];
  547. }
  548. }
  549. return new GlyphSequence(gs, null, nga, null, null, naa, null);
  550. } else {
  551. return gs;
  552. }
  553. }
  554. private static int[] toArray(IntBuffer ib) {
  555. if (ib != null) {
  556. int n = ib.limit();
  557. int[] ia = new int[n];
  558. ib.get(ia, 0, n);
  559. return ia;
  560. } else {
  561. return new int[0];
  562. }
  563. }
  564. private static List makeIdentityAssociations(int numChars, int numGlyphs) {
  565. int nc = numChars;
  566. int ng = numGlyphs;
  567. List av = new ArrayList(ng);
  568. for (int i = 0, n = ng; i < n; i++) {
  569. int k = (i > nc) ? nc : i;
  570. av.add(new CharAssociation(i, (k == nc) ? 0 : 1));
  571. }
  572. return av;
  573. }
  574. private static IntBuffer copyBuffer(IntBuffer ib) {
  575. if (ib != null) {
  576. int[] ia = new int [ ib.capacity() ];
  577. int p = ib.position();
  578. int l = ib.limit();
  579. System.arraycopy(ib.array(), 0, ia, 0, ia.length);
  580. return IntBuffer.wrap(ia, p, l - p);
  581. } else {
  582. return null;
  583. }
  584. }
  585. private static List copyAssociations(List ca) {
  586. if (ca != null) {
  587. return new ArrayList(ca);
  588. } else {
  589. return ca;
  590. }
  591. }
  592. }