You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

GlyphSequence.java 37KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.complexscripts.util;
  19. import java.nio.IntBuffer;
  20. import java.util.ArrayList;
  21. import java.util.HashMap;
  22. import java.util.List;
  23. import java.util.Map;
  24. // CSOFF: LineLengthCheck
  25. /**
  26. * <p>A GlyphSequence encapsulates a sequence of character codes, a sequence of glyph codes,
  27. * and a sequence of character associations, where, for each glyph in the sequence of glyph
  28. * codes, there is a corresponding character association. Character associations server to
  29. * relate the glyph codes in a glyph sequence to the specific characters in an original
  30. * character code sequence with which the glyph codes are associated.</p>
  31. *
  32. * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
  33. */
  34. public class GlyphSequence implements Cloneable {
  35. /** default character buffer capacity in case new character buffer is created */
  36. private static final int DEFAULT_CHARS_CAPACITY = 8;
  37. /** character buffer */
  38. private IntBuffer characters;
  39. /** glyph buffer */
  40. private IntBuffer glyphs;
  41. /** association list */
  42. private List associations;
  43. /** predications flag */
  44. private boolean predications;
  45. /**
  46. * Instantiate a glyph sequence, reusing (i.e., not copying) the referenced
  47. * character and glyph buffers and associations. If characters is null, then
  48. * an empty character buffer is created. If glyphs is null, then a glyph buffer
  49. * is created whose capacity is that of the character buffer. If associations is
  50. * null, then identity associations are created.
  51. * @param characters a (possibly null) buffer of associated (originating) characters
  52. * @param glyphs a (possibly null) buffer of glyphs
  53. * @param associations a (possibly null) array of glyph to character associations
  54. * @param predications true if predications are enabled
  55. */
  56. public GlyphSequence(IntBuffer characters, IntBuffer glyphs, List associations, boolean predications) {
  57. if (characters == null) {
  58. characters = IntBuffer.allocate(DEFAULT_CHARS_CAPACITY);
  59. }
  60. if (glyphs == null) {
  61. glyphs = IntBuffer.allocate(characters.capacity());
  62. }
  63. if (associations == null) {
  64. associations = makeIdentityAssociations(characters.limit(), glyphs.limit());
  65. }
  66. this.characters = characters;
  67. this.glyphs = glyphs;
  68. this.associations = associations;
  69. this.predications = predications;
  70. }
  71. /**
  72. * Instantiate a glyph sequence, reusing (i.e., not copying) the referenced
  73. * character and glyph buffers and associations. If characters is null, then
  74. * an empty character buffer is created. If glyphs is null, then a glyph buffer
  75. * is created whose capacity is that of the character buffer. If associations is
  76. * null, then identity associations are created.
  77. * @param characters a (possibly null) buffer of associated (originating) characters
  78. * @param glyphs a (possibly null) buffer of glyphs
  79. * @param associations a (possibly null) array of glyph to character associations
  80. */
  81. public GlyphSequence(IntBuffer characters, IntBuffer glyphs, List associations) {
  82. this (characters, glyphs, associations, false);
  83. }
  84. /**
  85. * Instantiate a glyph sequence using an existing glyph sequence, where the new glyph sequence shares
  86. * the character array of the existing sequence (but not the buffer object), and creates new copies
  87. * of glyphs buffer and association list.
  88. * @param gs an existing glyph sequence
  89. */
  90. public GlyphSequence(GlyphSequence gs) {
  91. this (gs.characters.duplicate(), copyBuffer(gs.glyphs), copyAssociations(gs.associations), gs.predications);
  92. }
  93. /**
  94. * Instantiate a glyph sequence using an existing glyph sequence, where the new glyph sequence shares
  95. * the character array of the existing sequence (but not the buffer object), but uses the specified
  96. * backtrack, input, and lookahead glyph arrays to populate the glyphs, and uses the specified
  97. * of glyphs buffer and association list.
  98. * backtrack, input, and lookahead association arrays to populate the associations.
  99. * @param gs an existing glyph sequence
  100. * @param bga backtrack glyph array
  101. * @param iga input glyph array
  102. * @param lga lookahead glyph array
  103. * @param bal backtrack association list
  104. * @param ial input association list
  105. * @param lal lookahead association list
  106. */
  107. public GlyphSequence(GlyphSequence gs, int[] bga, int[] iga, int[] lga, CharAssociation[] bal, CharAssociation[] ial, CharAssociation[] lal) {
  108. this (gs.characters.duplicate(), concatGlyphs(bga, iga, lga), concatAssociations(bal, ial, lal), gs.predications);
  109. }
  110. /**
  111. * Obtain reference to underlying character buffer.
  112. * @return character buffer reference
  113. */
  114. public IntBuffer getCharacters() {
  115. return characters;
  116. }
  117. /**
  118. * Obtain array of characters. If <code>copy</code> is true, then
  119. * a newly instantiated array is returned, otherwise a reference to
  120. * the underlying buffer's array is returned. N.B. in case a reference
  121. * to the undelying buffer's array is returned, the length
  122. * of the array is not necessarily the number of characters in array.
  123. * To determine the number of characters, use {@link #getCharacterCount}.
  124. * @param copy true if to return a newly instantiated array of characters
  125. * @return array of characters
  126. */
  127. public int[] getCharacterArray(boolean copy) {
  128. if (copy) {
  129. return toArray(characters);
  130. } else {
  131. return characters.array();
  132. }
  133. }
  134. /**
  135. * Obtain the number of characters in character array, where
  136. * each character constitutes a unicode scalar value.
  137. * @return number of characters available in character array
  138. */
  139. public int getCharacterCount() {
  140. return characters.limit();
  141. }
  142. /**
  143. * Obtain glyph id at specified index.
  144. * @param index to obtain glyph
  145. * @return the glyph identifier of glyph at specified index
  146. * @throws IndexOutOfBoundsException if index is less than zero
  147. * or exceeds last valid position
  148. */
  149. public int getGlyph(int index) throws IndexOutOfBoundsException {
  150. return glyphs.get(index);
  151. }
  152. /**
  153. * Set glyph id at specified index.
  154. * @param index to set glyph
  155. * @param gi glyph index
  156. * @throws IndexOutOfBoundsException if index is greater or equal to
  157. * the limit of the underlying glyph buffer
  158. */
  159. public void setGlyph(int index, int gi) throws IndexOutOfBoundsException {
  160. if (gi > 65535) {
  161. gi = 65535;
  162. }
  163. glyphs.put(index, gi);
  164. }
  165. /**
  166. * Obtain reference to underlying glyph buffer.
  167. * @return glyph buffer reference
  168. */
  169. public IntBuffer getGlyphs() {
  170. return glyphs;
  171. }
  172. /**
  173. * Obtain count glyphs starting at offset. If <code>count</code> is
  174. * negative, then it is treated as if the number of available glyphs
  175. * were specified.
  176. * @param offset into glyph sequence
  177. * @param count of glyphs to obtain starting at offset, or negative,
  178. * indicating all avaialble glyphs starting at offset
  179. * @return glyph array
  180. */
  181. public int[] getGlyphs(int offset, int count) {
  182. int ng = getGlyphCount();
  183. if (offset < 0) {
  184. offset = 0;
  185. } else if (offset > ng) {
  186. offset = ng;
  187. }
  188. if (count < 0) {
  189. count = ng - offset;
  190. }
  191. int[] ga = new int [ count ];
  192. for (int i = offset, n = offset + count, k = 0; i < n; i++) {
  193. if (k < ga.length) {
  194. ga [ k++ ] = glyphs.get(i);
  195. }
  196. }
  197. return ga;
  198. }
  199. /**
  200. * Obtain array of glyphs. If <code>copy</code> is true, then
  201. * a newly instantiated array is returned, otherwise a reference to
  202. * the underlying buffer's array is returned. N.B. in case a reference
  203. * to the undelying buffer's array is returned, the length
  204. * of the array is not necessarily the number of glyphs in array.
  205. * To determine the number of glyphs, use {@link #getGlyphCount}.
  206. * @param copy true if to return a newly instantiated array of glyphs
  207. * @return array of glyphs
  208. */
  209. public int[] getGlyphArray(boolean copy) {
  210. if (copy) {
  211. return toArray(glyphs);
  212. } else {
  213. return glyphs.array();
  214. }
  215. }
  216. /**
  217. * Obtain the number of glyphs in glyphs array, where
  218. * each glyph constitutes a font specific glyph index.
  219. * @return number of glyphs available in character array
  220. */
  221. public int getGlyphCount() {
  222. return glyphs.limit();
  223. }
  224. /**
  225. * Obtain association at specified index.
  226. * @param index into associations array
  227. * @return glyph to character associations at specified index
  228. * @throws IndexOutOfBoundsException if index is less than zero
  229. * or exceeds last valid position
  230. */
  231. public CharAssociation getAssociation(int index) throws IndexOutOfBoundsException {
  232. return (CharAssociation) associations.get(index);
  233. }
  234. /**
  235. * Obtain reference to underlying associations list.
  236. * @return associations list
  237. */
  238. public List getAssociations() {
  239. return associations;
  240. }
  241. /**
  242. * Obtain count associations starting at offset.
  243. * @param offset into glyph sequence
  244. * @param count of associations to obtain starting at offset, or negative,
  245. * indicating all avaialble associations starting at offset
  246. * @return associations
  247. */
  248. public CharAssociation[] getAssociations(int offset, int count) {
  249. int ng = getGlyphCount();
  250. if (offset < 0) {
  251. offset = 0;
  252. } else if (offset > ng) {
  253. offset = ng;
  254. }
  255. if (count < 0) {
  256. count = ng - offset;
  257. }
  258. CharAssociation[] aa = new CharAssociation [ count ];
  259. for (int i = offset, n = offset + count, k = 0; i < n; i++) {
  260. if (k < aa.length) {
  261. aa [ k++ ] = (CharAssociation) associations.get(i);
  262. }
  263. }
  264. return aa;
  265. }
  266. /**
  267. * Enable or disable predications.
  268. * @param enable true if predications are to be enabled; otherwise false to disable
  269. */
  270. public void setPredications(boolean enable) {
  271. this.predications = enable;
  272. }
  273. /**
  274. * Obtain predications state.
  275. * @return true if predications are enabled
  276. */
  277. public boolean getPredications() {
  278. return this.predications;
  279. }
  280. /**
  281. * Set predication <KEY,VALUE> at glyph sequence OFFSET.
  282. * @param offset offset (index) into glyph sequence
  283. * @param key predication key
  284. * @param value predication value
  285. */
  286. public void setPredication(int offset, String key, Object value) {
  287. if (predications) {
  288. CharAssociation[] aa = getAssociations(offset, 1);
  289. CharAssociation ca = aa[0];
  290. ca.setPredication(key, value);
  291. }
  292. }
  293. /**
  294. * Get predication KEY at glyph sequence OFFSET.
  295. * @param offset offset (index) into glyph sequence
  296. * @param key predication key
  297. * @return predication KEY at OFFSET or null if none exists
  298. */
  299. public Object getPredication(int offset, String key) {
  300. if (predications) {
  301. CharAssociation[] aa = getAssociations(offset, 1);
  302. CharAssociation ca = aa[0];
  303. return ca.getPredication(key);
  304. } else {
  305. return null;
  306. }
  307. }
  308. /**
  309. * Compare glyphs.
  310. * @param gb buffer containing glyph indices with which this glyph sequence's glyphs are to be compared
  311. * @return zero if glyphs are the same, otherwise returns 1 or -1 according to whether this glyph sequence's
  312. * glyphs are lexicographically greater or lesser than the glyphs in the specified string buffer
  313. */
  314. public int compareGlyphs(IntBuffer gb) {
  315. int ng = getGlyphCount();
  316. for (int i = 0, n = gb.limit(); i < n; i++) {
  317. if (i < ng) {
  318. int g1 = glyphs.get(i);
  319. int g2 = gb.get(i);
  320. if (g1 > g2) {
  321. return 1;
  322. } else if (g1 < g2) {
  323. return -1;
  324. }
  325. } else {
  326. return -1; // this gb is a proper prefix of specified gb
  327. }
  328. }
  329. return 0; // same lengths with no difference
  330. }
  331. /** {@inheritDoc} */
  332. public Object clone() {
  333. try {
  334. GlyphSequence gs = (GlyphSequence) super.clone();
  335. gs.characters = copyBuffer(characters);
  336. gs.glyphs = copyBuffer(glyphs);
  337. gs.associations = copyAssociations(associations);
  338. return gs;
  339. } catch (CloneNotSupportedException e) {
  340. return null;
  341. }
  342. }
  343. /** {@inheritDoc} */
  344. public String toString() {
  345. StringBuffer sb = new StringBuffer();
  346. sb.append('{');
  347. sb.append("chars = [");
  348. sb.append(characters);
  349. sb.append("], glyphs = [");
  350. sb.append(glyphs);
  351. sb.append("], associations = [");
  352. sb.append(associations);
  353. sb.append("]");
  354. sb.append('}');
  355. return sb.toString();
  356. }
  357. /**
  358. * Determine if two arrays of glyphs are identical.
  359. * @param ga1 first glyph array
  360. * @param ga2 second glyph array
  361. * @return true if arrays are botth null or both non-null and have identical elements
  362. */
  363. public static boolean sameGlyphs(int[] ga1, int[] ga2) {
  364. if (ga1 == ga2) {
  365. return true;
  366. } else if ((ga1 == null) || (ga2 == null)) {
  367. return false;
  368. } else if (ga1.length != ga2.length) {
  369. return false;
  370. } else {
  371. for (int i = 0, n = ga1.length; i < n; i++) {
  372. if (ga1[i] != ga2[i]) {
  373. return false;
  374. }
  375. }
  376. return true;
  377. }
  378. }
  379. /**
  380. * Concatenante glyph arrays.
  381. * @param bga backtrack glyph array
  382. * @param iga input glyph array
  383. * @param lga lookahead glyph array
  384. * @return new integer buffer containing concatenated glyphs
  385. */
  386. public static IntBuffer concatGlyphs(int[] bga, int[] iga, int[] lga) {
  387. int ng = 0;
  388. if (bga != null) {
  389. ng += bga.length;
  390. }
  391. if (iga != null) {
  392. ng += iga.length;
  393. }
  394. if (lga != null) {
  395. ng += lga.length;
  396. }
  397. IntBuffer gb = IntBuffer.allocate(ng);
  398. if (bga != null) {
  399. gb.put(bga);
  400. }
  401. if (iga != null) {
  402. gb.put(iga);
  403. }
  404. if (lga != null) {
  405. gb.put(lga);
  406. }
  407. gb.flip();
  408. return gb;
  409. }
  410. /**
  411. * Concatenante association arrays.
  412. * @param baa backtrack association array
  413. * @param iaa input association array
  414. * @param laa lookahead association array
  415. * @return new list containing concatenated associations
  416. */
  417. public static List concatAssociations(CharAssociation[] baa, CharAssociation[] iaa, CharAssociation[] laa) {
  418. int na = 0;
  419. if (baa != null) {
  420. na += baa.length;
  421. }
  422. if (iaa != null) {
  423. na += iaa.length;
  424. }
  425. if (laa != null) {
  426. na += laa.length;
  427. }
  428. if (na > 0) {
  429. List gl = new ArrayList(na);
  430. if (baa != null) {
  431. for (int i = 0; i < baa.length; i++) {
  432. gl.add(baa[i]);
  433. }
  434. }
  435. if (iaa != null) {
  436. for (int i = 0; i < iaa.length; i++) {
  437. gl.add(iaa[i]);
  438. }
  439. }
  440. if (laa != null) {
  441. for (int i = 0; i < laa.length; i++) {
  442. gl.add(laa[i]);
  443. }
  444. }
  445. return gl;
  446. } else {
  447. return null;
  448. }
  449. }
  450. /**
  451. * Join (concatenate) glyph sequences.
  452. * @param gs original glyph sequence from which to reuse character array reference
  453. * @param sa array of glyph sequences, whose glyph arrays and association lists are to be concatenated
  454. * @return new glyph sequence referring to character array of GS and concatenated glyphs and associations of SA
  455. */
  456. public static GlyphSequence join(GlyphSequence gs, GlyphSequence[] sa) {
  457. assert sa != null;
  458. int tg = 0;
  459. int ta = 0;
  460. for (int i = 0, n = sa.length; i < n; i++) {
  461. GlyphSequence s = sa [ i ];
  462. IntBuffer ga = s.getGlyphs();
  463. assert ga != null;
  464. int ng = ga.limit();
  465. List al = s.getAssociations();
  466. assert al != null;
  467. int na = al.size();
  468. assert na == ng;
  469. tg += ng;
  470. ta += na;
  471. }
  472. IntBuffer uga = IntBuffer.allocate(tg);
  473. ArrayList ual = new ArrayList(ta);
  474. for (int i = 0, n = sa.length; i < n; i++) {
  475. GlyphSequence s = sa [ i ];
  476. uga.put(s.getGlyphs());
  477. ual.addAll(s.getAssociations());
  478. }
  479. return new GlyphSequence(gs.getCharacters(), uga, ual, gs.getPredications());
  480. }
  481. /**
  482. * Reorder sequence such that [SOURCE,SOURCE+COUNT) is moved just prior to TARGET.
  483. * @param gs input sequence
  484. * @param source index of sub-sequence to reorder
  485. * @param count length of sub-sequence to reorder
  486. * @param target index to which source sub-sequence is to be moved
  487. * @return reordered sequence (or original if no reordering performed)
  488. */
  489. public static GlyphSequence reorder(GlyphSequence gs, int source, int count, int target) {
  490. if (source != target) {
  491. int ng = gs.getGlyphCount();
  492. int[] ga = gs.getGlyphArray(false);
  493. int[] nga = new int [ ng ];
  494. GlyphSequence.CharAssociation[] aa = gs.getAssociations(0, ng);
  495. GlyphSequence.CharAssociation[] naa = new GlyphSequence.CharAssociation [ ng ];
  496. if (source < target) {
  497. int t = 0;
  498. for (int s = 0, e = source; s < e; s++, t++) {
  499. nga[t] = ga[s];
  500. naa[t] = aa[s];
  501. }
  502. for (int s = source + count, e = target; s < e; s++, t++) {
  503. nga[t] = ga[s];
  504. naa[t] = aa[s];
  505. }
  506. for (int s = source, e = source + count; s < e; s++, t++) {
  507. nga[t] = ga[s];
  508. naa[t] = aa[s];
  509. }
  510. for (int s = target, e = ng; s < e; s++, t++) {
  511. nga[t] = ga[s];
  512. naa[t] = aa[s];
  513. }
  514. } else {
  515. int t = 0;
  516. for (int s = 0, e = target; s < e; s++, t++) {
  517. nga[t] = ga[s];
  518. naa[t] = aa[s];
  519. }
  520. for (int s = source, e = source + count; s < e; s++, t++) {
  521. nga[t] = ga[s];
  522. naa[t] = aa[s];
  523. }
  524. for (int s = target, e = source; s < e; s++, t++) {
  525. nga[t] = ga[s];
  526. naa[t] = aa[s];
  527. }
  528. for (int s = source + count, e = ng; s < e; s++, t++) {
  529. nga[t] = ga[s];
  530. naa[t] = aa[s];
  531. }
  532. }
  533. return new GlyphSequence(gs, null, nga, null, null, naa, null);
  534. } else {
  535. return gs;
  536. }
  537. }
  538. private static int[] toArray(IntBuffer ib) {
  539. if (ib != null) {
  540. int n = ib.limit();
  541. int[] ia = new int[n];
  542. ib.get(ia, 0, n);
  543. return ia;
  544. } else {
  545. return new int[0];
  546. }
  547. }
  548. private static List makeIdentityAssociations(int numChars, int numGlyphs) {
  549. int nc = numChars;
  550. int ng = numGlyphs;
  551. List av = new ArrayList(ng);
  552. for (int i = 0, n = ng; i < n; i++) {
  553. int k = (i > nc) ? nc : i;
  554. av.add(new CharAssociation(i, (k == nc) ? 0 : 1));
  555. }
  556. return av;
  557. }
  558. private static IntBuffer copyBuffer(IntBuffer ib) {
  559. if (ib != null) {
  560. int[] ia = new int [ ib.capacity() ];
  561. int p = ib.position();
  562. int l = ib.limit();
  563. System.arraycopy(ib.array(), 0, ia, 0, ia.length);
  564. return IntBuffer.wrap(ia, p, l - p);
  565. } else {
  566. return null;
  567. }
  568. }
  569. private static List copyAssociations(List ca) {
  570. if (ca != null) {
  571. return new ArrayList(ca);
  572. } else {
  573. return ca;
  574. }
  575. }
  576. /**
  577. * A structure class encapsulating an interval of characters
  578. * expressed as an offset and count of Unicode scalar values (in
  579. * an IntBuffer). A <code>CharAssociation</code> is used to
  580. * maintain a backpointer from a glyph to one or more character
  581. * intervals from which the glyph was derived.
  582. *
  583. * Each glyph in a glyph sequence is associated with a single
  584. * <code>CharAssociation</code> instance.
  585. *
  586. * A <code>CharAssociation</code> instance is additionally (and
  587. * optionally) used to record predication information about the
  588. * glyph, such as whether the glyph was produced by the
  589. * application of a specific substitution table or whether its
  590. * position was adjusted by a specific poisitioning table.
  591. */
  592. public static class CharAssociation implements Cloneable {
  593. // instance state
  594. private final int offset;
  595. private final int count;
  596. private final int[] subIntervals;
  597. private Map<String, Object> predications;
  598. // class state
  599. private static volatile Map<String, PredicationMerger> predicationMergers;
  600. interface PredicationMerger {
  601. Object merge(String key, Object v1, Object v2);
  602. }
  603. /**
  604. * Instantiate a character association.
  605. * @param offset into array of Unicode scalar values (in associated IntBuffer)
  606. * @param count of Unicode scalar values (in associated IntBuffer)
  607. * @param subIntervals if disjoint, then array of sub-intervals, otherwise null; even
  608. * members of array are sub-interval starts, and odd members are sub-interval
  609. * ends (exclusive)
  610. */
  611. public CharAssociation(int offset, int count, int[] subIntervals) {
  612. this.offset = offset;
  613. this.count = count;
  614. this.subIntervals = ((subIntervals != null) && (subIntervals.length > 2)) ? subIntervals : null;
  615. }
  616. /**
  617. * Instantiate a non-disjoint character association.
  618. * @param offset into array of UTF-16 code elements (in associated CharSequence)
  619. * @param count of UTF-16 character code elements (in associated CharSequence)
  620. */
  621. public CharAssociation(int offset, int count) {
  622. this (offset, count, null);
  623. }
  624. /**
  625. * Instantiate a non-disjoint character association.
  626. * @param subIntervals if disjoint, then array of sub-intervals, otherwise null; even
  627. * members of array are sub-interval starts, and odd members are sub-interval
  628. * ends (exclusive)
  629. */
  630. public CharAssociation(int[] subIntervals) {
  631. this (getSubIntervalsStart(subIntervals), getSubIntervalsLength(subIntervals), subIntervals);
  632. }
  633. /** @return offset (start of association interval) */
  634. public int getOffset() {
  635. return offset;
  636. }
  637. /** @return count (number of characer codes in association) */
  638. public int getCount() {
  639. return count;
  640. }
  641. /** @return start of association interval */
  642. public int getStart() {
  643. return getOffset();
  644. }
  645. /** @return end of association interval */
  646. public int getEnd() {
  647. return getOffset() + getCount();
  648. }
  649. /** @return true if association is disjoint */
  650. public boolean isDisjoint() {
  651. return subIntervals != null;
  652. }
  653. /** @return subintervals of disjoint association */
  654. public int[] getSubIntervals() {
  655. return subIntervals;
  656. }
  657. /** @return count of subintervals of disjoint association */
  658. public int getSubIntervalCount() {
  659. return (subIntervals != null) ? (subIntervals.length / 2) : 0;
  660. }
  661. /**
  662. * @param offset of interval in sequence
  663. * @param count length of interval
  664. * @return true if this association is contained within [offset,offset+count)
  665. */
  666. public boolean contained(int offset, int count) {
  667. int s = offset;
  668. int e = offset + count;
  669. if (!isDisjoint()) {
  670. int s0 = getStart();
  671. int e0 = getEnd();
  672. return (s0 >= s) && (e0 <= e);
  673. } else {
  674. int ns = getSubIntervalCount();
  675. for (int i = 0; i < ns; i++) {
  676. int s0 = subIntervals [ 2 * i + 0 ];
  677. int e0 = subIntervals [ 2 * i + 1 ];
  678. if ((s0 >= s) && (e0 <= e)) {
  679. return true;
  680. }
  681. }
  682. return false;
  683. }
  684. }
  685. /**
  686. * Set predication <KEY,VALUE>.
  687. * @param key predication key
  688. * @param value predication value
  689. */
  690. public void setPredication(String key, Object value) {
  691. if (predications == null) {
  692. predications = new HashMap<String, Object>();
  693. }
  694. if (predications != null) {
  695. predications.put(key, value);
  696. }
  697. }
  698. /**
  699. * Get predication KEY.
  700. * @param key predication key
  701. * @return predication KEY at OFFSET or null if none exists
  702. */
  703. public Object getPredication(String key) {
  704. if (predications != null) {
  705. return predications.get(key);
  706. } else {
  707. return null;
  708. }
  709. }
  710. /**
  711. * Merge predication <KEY,VALUE>.
  712. * @param key predication key
  713. * @param value predication value
  714. */
  715. public void mergePredication(String key, Object value) {
  716. if (predications == null) {
  717. predications = new HashMap<String, Object>();
  718. }
  719. if (predications != null) {
  720. if (predications.containsKey(key)) {
  721. Object v1 = predications.get(key);
  722. Object v2 = value;
  723. predications.put(key, mergePredicationValues(key, v1, v2));
  724. } else {
  725. predications.put(key, value);
  726. }
  727. }
  728. }
  729. /**
  730. * Merge predication values V1 and V2 on KEY. Uses registered <code>PredicationMerger</code>
  731. * if one exists, otherwise uses V2 if non-null, otherwise uses V1.
  732. * @param key predication key
  733. * @param v1 first (original) predication value
  734. * @param v2 second (to be merged) predication value
  735. * @return merged value
  736. */
  737. public static Object mergePredicationValues(String key, Object v1, Object v2) {
  738. PredicationMerger pm = getPredicationMerger(key);
  739. if (pm != null) {
  740. return pm.merge(key, v1, v2);
  741. } else if (v2 != null) {
  742. return v2;
  743. } else {
  744. return v1;
  745. }
  746. }
  747. /**
  748. * Merge predications from another CA.
  749. * @param ca from which to merge
  750. */
  751. public void mergePredications(CharAssociation ca) {
  752. if (ca.predications != null) {
  753. for (Map.Entry<String, Object> e : ca.predications.entrySet()) {
  754. mergePredication(e.getKey(), e.getValue());
  755. }
  756. }
  757. }
  758. /** {@inheritDoc} */
  759. public Object clone() {
  760. try {
  761. CharAssociation ca = (CharAssociation) super.clone();
  762. if (predications != null) {
  763. ca.predications = new HashMap<String, Object>(predications);
  764. }
  765. return ca;
  766. } catch (CloneNotSupportedException e) {
  767. return null;
  768. }
  769. }
  770. /**
  771. * Register predication merger PM for KEY.
  772. * @param key for predication merger
  773. * @param pm predication merger
  774. */
  775. public static void setPredicationMerger(String key, PredicationMerger pm) {
  776. if (predicationMergers == null) {
  777. predicationMergers = new HashMap<String, PredicationMerger>();
  778. }
  779. if (predicationMergers != null) {
  780. predicationMergers.put(key, pm);
  781. }
  782. }
  783. /**
  784. * Obtain predication merger for KEY.
  785. * @param key for predication merger
  786. * @return predication merger or null if none exists
  787. */
  788. public static PredicationMerger getPredicationMerger(String key) {
  789. if (predicationMergers != null) {
  790. return predicationMergers.get(key);
  791. } else {
  792. return null;
  793. }
  794. }
  795. /**
  796. * Replicate association to form <code>repeat</code> new associations.
  797. * @param a association to replicate
  798. * @param repeat count
  799. * @return array of replicated associations
  800. */
  801. public static CharAssociation[] replicate(CharAssociation a, int repeat) {
  802. CharAssociation[] aa = new CharAssociation [ repeat ];
  803. for (int i = 0, n = aa.length; i < n; i++) {
  804. aa [ i ] = (CharAssociation) a.clone();
  805. }
  806. return aa;
  807. }
  808. /**
  809. * Join (merge) multiple associations into a single, potentially disjoint
  810. * association.
  811. * @param aa array of associations to join
  812. * @return (possibly disjoint) association containing joined associations
  813. */
  814. public static CharAssociation join(CharAssociation[] aa) {
  815. CharAssociation ca;
  816. // extract sorted intervals
  817. int[] ia = extractIntervals(aa);
  818. if ((ia == null) || (ia.length == 0)) {
  819. ca = new CharAssociation(0, 0);
  820. } else if (ia.length == 2) {
  821. int s = ia[0];
  822. int e = ia[1];
  823. ca = new CharAssociation(s, e - s);
  824. } else {
  825. ca = new CharAssociation(mergeIntervals(ia));
  826. }
  827. return mergePredicates(ca, aa);
  828. }
  829. private static CharAssociation mergePredicates(CharAssociation ca, CharAssociation[] aa) {
  830. for (CharAssociation a : aa) {
  831. ca.mergePredications(a);
  832. }
  833. return ca;
  834. }
  835. private static int getSubIntervalsStart(int[] ia) {
  836. int us = Integer.MAX_VALUE;
  837. int ue = Integer.MIN_VALUE;
  838. if (ia != null) {
  839. for (int i = 0, n = ia.length; i < n; i += 2) {
  840. int s = ia [ i + 0 ];
  841. int e = ia [ i + 1 ];
  842. if (s < us) {
  843. us = s;
  844. }
  845. if (e > ue) {
  846. ue = e;
  847. }
  848. }
  849. if (ue < 0) {
  850. ue = 0;
  851. }
  852. if (us > ue) {
  853. us = ue;
  854. }
  855. }
  856. return us;
  857. }
  858. private static int getSubIntervalsLength(int[] ia) {
  859. int us = Integer.MAX_VALUE;
  860. int ue = Integer.MIN_VALUE;
  861. if (ia != null) {
  862. for (int i = 0, n = ia.length; i < n; i += 2) {
  863. int s = ia [ i + 0 ];
  864. int e = ia [ i + 1 ];
  865. if (s < us) {
  866. us = s;
  867. }
  868. if (e > ue) {
  869. ue = e;
  870. }
  871. }
  872. if (ue < 0) {
  873. ue = 0;
  874. }
  875. if (us > ue) {
  876. us = ue;
  877. }
  878. }
  879. return ue - us;
  880. }
  881. /**
  882. * Extract sorted sub-intervals.
  883. */
  884. private static int[] extractIntervals(CharAssociation[] aa) {
  885. int ni = 0;
  886. for (int i = 0, n = aa.length; i < n; i++) {
  887. CharAssociation a = aa [ i ];
  888. if (a.isDisjoint()) {
  889. ni += a.getSubIntervalCount();
  890. } else {
  891. ni += 1;
  892. }
  893. }
  894. int[] sa = new int [ ni ];
  895. int[] ea = new int [ ni ];
  896. for (int i = 0, k = 0; i < aa.length; i++) {
  897. CharAssociation a = aa [ i ];
  898. if (a.isDisjoint()) {
  899. int[] da = a.getSubIntervals();
  900. for (int j = 0; j < da.length; j += 2) {
  901. sa [ k ] = da [ j + 0 ];
  902. ea [ k ] = da [ j + 1 ];
  903. k++;
  904. }
  905. } else {
  906. sa [ k ] = a.getStart();
  907. ea [ k ] = a.getEnd();
  908. k++;
  909. }
  910. }
  911. return sortIntervals(sa, ea);
  912. }
  913. private static final int[] SORT_INCREMENTS_16
  914. = { 1391376, 463792, 198768, 86961, 33936, 13776, 4592, 1968, 861, 336, 112, 48, 21, 7, 3, 1 };
  915. private static final int[] SORT_INCREMENTS_03
  916. = { 7, 3, 1 };
  917. /**
  918. * Sort sub-intervals using modified Shell Sort.
  919. */
  920. private static int[] sortIntervals(int[] sa, int[] ea) {
  921. assert sa != null;
  922. assert ea != null;
  923. assert sa.length == ea.length;
  924. int ni = sa.length;
  925. int[] incr = (ni < 21) ? SORT_INCREMENTS_03 : SORT_INCREMENTS_16;
  926. for (int k = 0; k < incr.length; k++) {
  927. for (int h = incr [ k ], i = h, n = ni, j; i < n; i++) {
  928. int s1 = sa [ i ];
  929. int e1 = ea [ i ];
  930. for (j = i; j >= h; j -= h) {
  931. int s2 = sa [ j - h ];
  932. int e2 = ea [ j - h ];
  933. if (s2 > s1) {
  934. sa [ j ] = s2;
  935. ea [ j ] = e2;
  936. } else if ((s2 == s1) && (e2 > e1)) {
  937. sa [ j ] = s2;
  938. ea [ j ] = e2;
  939. } else {
  940. break;
  941. }
  942. }
  943. sa [ j ] = s1;
  944. ea [ j ] = e1;
  945. }
  946. }
  947. int[] ia = new int [ ni * 2 ];
  948. for (int i = 0; i < ni; i++) {
  949. ia [ (i * 2) + 0 ] = sa [ i ];
  950. ia [ (i * 2) + 1 ] = ea [ i ];
  951. }
  952. return ia;
  953. }
  954. /**
  955. * Merge overlapping and abutting sub-intervals.
  956. */
  957. private static int[] mergeIntervals(int[] ia) {
  958. int ni = ia.length;
  959. int i;
  960. int n;
  961. int nm;
  962. int is;
  963. int ie;
  964. // count merged sub-intervals
  965. for (i = 0, n = ni, nm = 0, is = ie = -1; i < n; i += 2) {
  966. int s = ia [ i + 0 ];
  967. int e = ia [ i + 1 ];
  968. if ((ie < 0) || (s > ie)) {
  969. is = s;
  970. ie = e;
  971. nm++;
  972. } else if (s >= is) {
  973. if (e > ie) {
  974. ie = e;
  975. }
  976. }
  977. }
  978. int[] mi = new int [ nm * 2 ];
  979. // populate merged sub-intervals
  980. for (i = 0, n = ni, nm = 0, is = ie = -1; i < n; i += 2) {
  981. int s = ia [ i + 0 ];
  982. int e = ia [ i + 1 ];
  983. int k = nm * 2;
  984. if ((ie < 0) || (s > ie)) {
  985. is = s;
  986. ie = e;
  987. mi [ k + 0 ] = is;
  988. mi [ k + 1 ] = ie;
  989. nm++;
  990. } else if (s >= is) {
  991. if (e > ie) {
  992. ie = e;
  993. }
  994. mi [ k - 1 ] = ie;
  995. }
  996. }
  997. return mi;
  998. }
  999. }
  1000. }