You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

FOText.java 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.fo;
  19. import java.awt.Color;
  20. import java.nio.CharBuffer;
  21. import java.util.NoSuchElementException;
  22. import org.xml.sax.Locator;
  23. import org.apache.fop.apps.FOPException;
  24. import org.apache.fop.datatypes.Length;
  25. import org.apache.fop.fo.flow.Block;
  26. import org.apache.fop.fo.properties.CommonFont;
  27. import org.apache.fop.fo.properties.CommonHyphenation;
  28. import org.apache.fop.fo.properties.CommonTextDecoration;
  29. import org.apache.fop.fo.properties.KeepProperty;
  30. import org.apache.fop.fo.properties.Property;
  31. import org.apache.fop.fo.properties.SpaceProperty;
  32. import org.apache.fop.util.CharUtilities;
  33. /**
  34. * A text node (PCDATA) in the formatting object tree.
  35. */
  36. public class FOText extends FONode implements CharSequence {
  37. /** the <code>CharBuffer</code> containing the text */
  38. private CharBuffer charBuffer;
  39. /** properties relevant for #PCDATA */
  40. private CommonFont commonFont;
  41. private CommonHyphenation commonHyphenation;
  42. private Color color;
  43. private KeepProperty keepTogether;
  44. private Property letterSpacing;
  45. private SpaceProperty lineHeight;
  46. private int whiteSpaceTreatment;
  47. private int whiteSpaceCollapse;
  48. private int textTransform;
  49. private Property wordSpacing;
  50. private int wrapOption;
  51. private Length baselineShift;
  52. /**
  53. * Points to the previous FOText object created within the current
  54. * block. If this is "null", this is the first such object.
  55. */
  56. private FOText prevFOTextThisBlock = null;
  57. /**
  58. * Points to the next FOText object created within the current
  59. * block. If this is "null", this is the last such object.
  60. */
  61. private FOText nextFOTextThisBlock = null;
  62. /**
  63. * Points to the ancestor Block object. This is used to keep track of
  64. * which FOText nodes are descendants of the same block.
  65. */
  66. private Block ancestorBlock = null;
  67. /** Holds the text decoration values. May be null */
  68. private CommonTextDecoration textDecoration;
  69. private static final int IS_WORD_CHAR_FALSE = 0;
  70. private static final int IS_WORD_CHAR_TRUE = 1;
  71. private static final int IS_WORD_CHAR_MAYBE = 2;
  72. /**
  73. * Creates a new FO text node.
  74. *
  75. * @param parent FONode that is the parent of this object
  76. */
  77. public FOText(FONode parent) {
  78. super(parent);
  79. }
  80. /** {@inheritDoc} */
  81. protected void characters(char[] data, int start, int length,
  82. PropertyList list, Locator locator) throws FOPException {
  83. if (this.charBuffer == null) {
  84. // buffer not yet initialized, do so now
  85. this.charBuffer = CharBuffer.allocate(length);
  86. } else {
  87. // allocate a larger buffer, and transfer contents
  88. int newLength = this.charBuffer.limit() + length;
  89. CharBuffer newBuffer = CharBuffer.allocate(newLength);
  90. this.charBuffer.rewind();
  91. newBuffer.put(this.charBuffer);
  92. this.charBuffer = newBuffer;
  93. }
  94. // append characters
  95. this.charBuffer.put(data, start, length);
  96. }
  97. /**
  98. * Return the array of characters for this instance.
  99. *
  100. * @return a char array containing the text
  101. */
  102. public char[] getCharArray() {
  103. if (this.charBuffer == null) {
  104. return null;
  105. }
  106. if (this.charBuffer.hasArray()) {
  107. return this.charBuffer.array();
  108. }
  109. // only if the buffer implementation has
  110. // no accessible backing array, return a new one
  111. char[] ca = new char[this.charBuffer.limit()];
  112. this.charBuffer.rewind();
  113. this.charBuffer.get(ca);
  114. return ca;
  115. }
  116. /** {@inheritDoc} */
  117. public FONode clone(FONode parent, boolean removeChildren)
  118. throws FOPException {
  119. FOText ft = (FOText) super.clone(parent, removeChildren);
  120. if (removeChildren) {
  121. // not really removing, just make sure the char buffer
  122. // pointed to is really a different one
  123. if (this.charBuffer != null) {
  124. ft.charBuffer = CharBuffer.allocate(this.charBuffer.limit());
  125. this.charBuffer.rewind();
  126. ft.charBuffer.put(this.charBuffer);
  127. ft.charBuffer.rewind();
  128. }
  129. }
  130. ft.prevFOTextThisBlock = null;
  131. ft.nextFOTextThisBlock = null;
  132. ft.ancestorBlock = null;
  133. return ft;
  134. }
  135. /** {@inheritDoc} */
  136. public void bind(PropertyList pList) throws FOPException {
  137. this.commonFont = pList.getFontProps();
  138. this.commonHyphenation = pList.getHyphenationProps();
  139. this.color = pList.get(Constants.PR_COLOR).getColor(getUserAgent());
  140. this.keepTogether = pList.get(Constants.PR_KEEP_TOGETHER).getKeep();
  141. this.lineHeight = pList.get(Constants.PR_LINE_HEIGHT).getSpace();
  142. this.letterSpacing = pList.get(Constants.PR_LETTER_SPACING);
  143. this.whiteSpaceCollapse = pList.get(Constants.PR_WHITE_SPACE_COLLAPSE).getEnum();
  144. this.whiteSpaceTreatment = pList.get(Constants.PR_WHITE_SPACE_TREATMENT).getEnum();
  145. this.textTransform = pList.get(Constants.PR_TEXT_TRANSFORM).getEnum();
  146. this.wordSpacing = pList.get(Constants.PR_WORD_SPACING);
  147. this.wrapOption = pList.get(Constants.PR_WRAP_OPTION).getEnum();
  148. this.textDecoration = pList.getTextDecorationProps();
  149. this.baselineShift = pList.get(Constants.PR_BASELINE_SHIFT).getLength();
  150. }
  151. /** {@inheritDoc} */
  152. protected void endOfNode() throws FOPException {
  153. super.endOfNode();
  154. getFOEventHandler().characters(
  155. this.getCharArray(), 0, this.charBuffer.limit());
  156. }
  157. /** {@inheritDoc} */
  158. public void finalizeNode() {
  159. textTransform();
  160. }
  161. /**
  162. * Check if this text node will create an area.
  163. * This means either there is non-whitespace or it is
  164. * preserved whitespace.
  165. * Maybe this just needs to check length > 0, since char iterators
  166. * handle whitespace.
  167. *
  168. * @return true if this will create an area in the output
  169. */
  170. public boolean willCreateArea() {
  171. if (whiteSpaceCollapse == Constants.EN_FALSE
  172. && this.charBuffer.limit() > 0) {
  173. return true;
  174. }
  175. char ch;
  176. this.charBuffer.rewind();
  177. while (this.charBuffer.hasRemaining()) {
  178. ch = this.charBuffer.get();
  179. if (!((ch == CharUtilities.SPACE)
  180. || (ch == CharUtilities.LINEFEED_CHAR)
  181. || (ch == CharUtilities.CARRIAGE_RETURN)
  182. || (ch == CharUtilities.TAB))) {
  183. // not whitespace
  184. this.charBuffer.rewind();
  185. return true;
  186. }
  187. }
  188. return false;
  189. }
  190. /**
  191. * @return a new TextCharIterator
  192. */
  193. public CharIterator charIterator() {
  194. return new TextCharIterator();
  195. }
  196. /**
  197. * This method is run as part of the ancestor Block's flushText(), to
  198. * create xref pointers to the previous FOText objects within the same Block
  199. * @param ancestorBlock the ancestor fo:block
  200. */
  201. protected void createBlockPointers(Block ancestorBlock) {
  202. this.ancestorBlock = ancestorBlock;
  203. // if the last FOText is a sibling, point to it, and have it point here
  204. if (ancestorBlock.lastFOTextProcessed != null) {
  205. if (ancestorBlock.lastFOTextProcessed.ancestorBlock
  206. == this.ancestorBlock) {
  207. prevFOTextThisBlock = ancestorBlock.lastFOTextProcessed;
  208. prevFOTextThisBlock.nextFOTextThisBlock = this;
  209. } else {
  210. prevFOTextThisBlock = null;
  211. }
  212. }
  213. }
  214. /**
  215. * This method is run as part of endOfNode(), to handle the
  216. * text-transform property for accumulated FOText
  217. */
  218. private void textTransform() {
  219. if (getBuilderContext().inMarker()
  220. || textTransform == Constants.EN_NONE) {
  221. return;
  222. }
  223. this.charBuffer.rewind();
  224. CharBuffer tmp = this.charBuffer.slice();
  225. char c;
  226. int lim = this.charBuffer.limit();
  227. int pos = -1;
  228. while (++pos < lim) {
  229. c = this.charBuffer.get();
  230. switch (textTransform) {
  231. case Constants.EN_UPPERCASE:
  232. tmp.put(Character.toUpperCase(c));
  233. break;
  234. case Constants.EN_LOWERCASE:
  235. tmp.put(Character.toLowerCase(c));
  236. break;
  237. case Constants.EN_CAPITALIZE:
  238. if (isStartOfWord(pos)) {
  239. /*
  240. Use toTitleCase here. Apparently, some languages use
  241. a different character to represent a letter when using
  242. initial caps than when all of the letters in the word
  243. are capitalized. We will try to let Java handle this.
  244. */
  245. tmp.put(Character.toTitleCase(c));
  246. } else {
  247. tmp.put(c);
  248. }
  249. break;
  250. default:
  251. //should never happen as the property subsystem catches that case
  252. assert false;
  253. //nop
  254. }
  255. }
  256. }
  257. /**
  258. * Determines whether a particular location in an FOText object's text is
  259. * the start of a new "word". The use of "word" here is specifically for
  260. * the text-transform property, but may be useful for other things as
  261. * well, such as word-spacing. The definition of "word" is somewhat ambiguous
  262. * and appears to be definable by the user agent.
  263. *
  264. * @param i index into charBuffer
  265. *
  266. * @return True if the character at this location is the start of a new
  267. * word.
  268. */
  269. private boolean isStartOfWord(int i) {
  270. char prevChar = getRelativeCharInBlock(i, -1);
  271. /* All we are really concerned about here is of what type prevChar
  272. * is. If inputChar is not part of a word, then the Java
  273. * conversions will (we hope) simply return inputChar.
  274. */
  275. switch (isWordChar(prevChar)) {
  276. case IS_WORD_CHAR_TRUE:
  277. return false;
  278. case IS_WORD_CHAR_FALSE:
  279. return true;
  280. /* "MAYBE" implies that additional context is needed. An example is a
  281. * single-quote, either straight or closing, which might be interpreted
  282. * as a possessive or a contraction, or might be a closing quote.
  283. */
  284. case IS_WORD_CHAR_MAYBE:
  285. char prevPrevChar = getRelativeCharInBlock(i, -2);
  286. switch (isWordChar(prevPrevChar)) {
  287. case IS_WORD_CHAR_TRUE:
  288. return false;
  289. case IS_WORD_CHAR_FALSE:
  290. return true;
  291. case IS_WORD_CHAR_MAYBE:
  292. return true;
  293. default:
  294. return false;
  295. }
  296. default:
  297. return false;
  298. }
  299. }
  300. /**
  301. * Finds a character within the current Block that is relative in location
  302. * to a character in the current FOText. Treats all FOText objects within a
  303. * block as one unit, allowing text in adjoining FOText objects to be
  304. * returned if the parameters are outside of the current object.
  305. *
  306. * @param i index into the CharBuffer
  307. * @param offset signed integer with relative position within the
  308. * block of the character to return. To return the character immediately
  309. * preceding i, pass -1. To return the character immediately after i,
  310. * pass 1.
  311. * @return the character in the offset position within the block; \u0000 if
  312. * the offset points to an area outside of the block.
  313. */
  314. private char getRelativeCharInBlock(int i, int offset) {
  315. int charIndex = i + offset;
  316. // The easy case is where the desired character is in the same FOText
  317. if (charIndex >= 0 && charIndex < this.length()) {
  318. return this.charAt(i + offset);
  319. }
  320. // For now, we can't look at following FOText nodes
  321. if (offset > 0) {
  322. return CharUtilities.NULL_CHAR;
  323. }
  324. // Remaining case has the text in some previous FOText node
  325. boolean foundChar = false;
  326. char charToReturn = CharUtilities.NULL_CHAR;
  327. FOText nodeToTest = this;
  328. int remainingOffset = offset + i;
  329. while (!foundChar) {
  330. if (nodeToTest.prevFOTextThisBlock == null) {
  331. break;
  332. }
  333. nodeToTest = nodeToTest.prevFOTextThisBlock;
  334. int diff = nodeToTest.length() + remainingOffset - 1;
  335. if (diff >= 0) {
  336. charToReturn = nodeToTest.charAt(diff);
  337. foundChar = true;
  338. } else {
  339. remainingOffset += diff;
  340. }
  341. }
  342. return charToReturn;
  343. }
  344. /**
  345. * @return The previous FOText node in this Block; null, if this is the
  346. * first FOText in this Block.
  347. */
  348. public FOText getPrevFOTextThisBlock () {
  349. return prevFOTextThisBlock;
  350. }
  351. /**
  352. * @return The next FOText node in this Block; null if this is the last
  353. * FOText in this Block; null if subsequent FOText nodes have not yet been
  354. * processed.
  355. */
  356. public FOText getNextFOTextThisBlock () {
  357. return nextFOTextThisBlock;
  358. }
  359. /**
  360. * @return The nearest ancestor block object which contains this FOText.
  361. */
  362. public Block getAncestorBlock () {
  363. return ancestorBlock;
  364. }
  365. /**
  366. * Determines whether the input char should be considered part of a
  367. * "word". This is used primarily to determine whether the character
  368. * immediately following starts a new word, but may have other uses.
  369. * We have not found a definition of "word" in the standard (1.0), so the
  370. * logic used here is based on the programmer's best guess.
  371. *
  372. * @param inputChar the character to be tested.
  373. * @return int IS_WORD_CHAR_TRUE, IS_WORD_CHAR_FALSE, or IS_WORD_CHAR_MAYBE,
  374. * depending on whether the character should be considered part of a word
  375. * or not.
  376. */
  377. private static int isWordChar(char inputChar) {
  378. switch (Character.getType(inputChar)) {
  379. case Character.COMBINING_SPACING_MARK:
  380. return IS_WORD_CHAR_TRUE;
  381. case Character.CONNECTOR_PUNCTUATION:
  382. return IS_WORD_CHAR_TRUE;
  383. case Character.CONTROL:
  384. return IS_WORD_CHAR_FALSE;
  385. case Character.CURRENCY_SYMBOL:
  386. return IS_WORD_CHAR_TRUE;
  387. case Character.DASH_PUNCTUATION:
  388. if (inputChar == '-') {
  389. return IS_WORD_CHAR_TRUE; //hyphen
  390. }
  391. return IS_WORD_CHAR_FALSE;
  392. case Character.DECIMAL_DIGIT_NUMBER:
  393. return IS_WORD_CHAR_TRUE;
  394. case Character.ENCLOSING_MARK:
  395. return IS_WORD_CHAR_FALSE;
  396. case Character.END_PUNCTUATION:
  397. if (inputChar == '\u2019') {
  398. return IS_WORD_CHAR_MAYBE; //apostrophe, right single quote
  399. }
  400. return IS_WORD_CHAR_FALSE;
  401. case Character.FORMAT:
  402. return IS_WORD_CHAR_FALSE;
  403. case Character.LETTER_NUMBER:
  404. return IS_WORD_CHAR_TRUE;
  405. case Character.LINE_SEPARATOR:
  406. return IS_WORD_CHAR_FALSE;
  407. case Character.LOWERCASE_LETTER:
  408. return IS_WORD_CHAR_TRUE;
  409. case Character.MATH_SYMBOL:
  410. return IS_WORD_CHAR_FALSE;
  411. case Character.MODIFIER_LETTER:
  412. return IS_WORD_CHAR_TRUE;
  413. case Character.MODIFIER_SYMBOL:
  414. return IS_WORD_CHAR_TRUE;
  415. case Character.NON_SPACING_MARK:
  416. return IS_WORD_CHAR_TRUE;
  417. case Character.OTHER_LETTER:
  418. return IS_WORD_CHAR_TRUE;
  419. case Character.OTHER_NUMBER:
  420. return IS_WORD_CHAR_TRUE;
  421. case Character.OTHER_PUNCTUATION:
  422. if (inputChar == '\'') {
  423. return IS_WORD_CHAR_MAYBE; //ASCII apostrophe
  424. }
  425. return IS_WORD_CHAR_FALSE;
  426. case Character.OTHER_SYMBOL:
  427. return IS_WORD_CHAR_TRUE;
  428. case Character.PARAGRAPH_SEPARATOR:
  429. return IS_WORD_CHAR_FALSE;
  430. case Character.PRIVATE_USE:
  431. return IS_WORD_CHAR_FALSE;
  432. case Character.SPACE_SEPARATOR:
  433. return IS_WORD_CHAR_FALSE;
  434. case Character.START_PUNCTUATION:
  435. return IS_WORD_CHAR_FALSE;
  436. case Character.SURROGATE:
  437. return IS_WORD_CHAR_FALSE;
  438. case Character.TITLECASE_LETTER:
  439. return IS_WORD_CHAR_TRUE;
  440. case Character.UNASSIGNED:
  441. return IS_WORD_CHAR_FALSE;
  442. case Character.UPPERCASE_LETTER:
  443. return IS_WORD_CHAR_TRUE;
  444. default:
  445. return IS_WORD_CHAR_FALSE;
  446. }
  447. }
  448. private class TextCharIterator extends CharIterator {
  449. private int currentPosition = 0;
  450. private boolean canRemove = false;
  451. private boolean canReplace = false;
  452. /** {@inheritDoc} */
  453. public boolean hasNext() {
  454. return (this.currentPosition < charBuffer.limit());
  455. }
  456. /** {@inheritDoc} */
  457. public char nextChar() {
  458. if (this.currentPosition < charBuffer.limit()) {
  459. this.canRemove = true;
  460. this.canReplace = true;
  461. return charBuffer.get(currentPosition++);
  462. } else {
  463. throw new NoSuchElementException();
  464. }
  465. }
  466. /** {@inheritDoc} */
  467. public void remove() {
  468. if (this.canRemove) {
  469. charBuffer.position(currentPosition);
  470. // Slice the buffer at the current position
  471. CharBuffer tmp = charBuffer.slice();
  472. // Reset position to before current character
  473. charBuffer.position(--currentPosition);
  474. if (tmp.hasRemaining()) {
  475. // Transfer any remaining characters
  476. charBuffer.mark();
  477. charBuffer.put(tmp);
  478. charBuffer.reset();
  479. }
  480. // Decrease limit
  481. charBuffer.limit(charBuffer.limit() - 1);
  482. // Make sure following calls fail, unless nextChar() was called
  483. this.canRemove = false;
  484. } else {
  485. throw new IllegalStateException();
  486. }
  487. }
  488. /** {@inheritDoc} */
  489. public void replaceChar(char c) {
  490. if (this.canReplace) {
  491. charBuffer.put(currentPosition - 1, c);
  492. } else {
  493. throw new IllegalStateException();
  494. }
  495. }
  496. }
  497. /**
  498. * @return the Common Font Properties.
  499. */
  500. public CommonFont getCommonFont() {
  501. return commonFont;
  502. }
  503. /**
  504. * @return the Common Hyphenation Properties.
  505. */
  506. public CommonHyphenation getCommonHyphenation() {
  507. return commonHyphenation;
  508. }
  509. /**
  510. * @return the "color" property.
  511. */
  512. public Color getColor() {
  513. return color;
  514. }
  515. /**
  516. * @return the "keep-together" property.
  517. */
  518. public KeepProperty getKeepTogether() {
  519. return keepTogether;
  520. }
  521. /**
  522. * @return the "letter-spacing" property.
  523. */
  524. public Property getLetterSpacing() {
  525. return letterSpacing;
  526. }
  527. /**
  528. * @return the "line-height" property.
  529. */
  530. public SpaceProperty getLineHeight() {
  531. return lineHeight;
  532. }
  533. /**
  534. * @return the "white-space-treatment" property
  535. */
  536. public int getWhitespaceTreatment() {
  537. return whiteSpaceTreatment;
  538. }
  539. /**
  540. * @return the "word-spacing" property.
  541. */
  542. public Property getWordSpacing() {
  543. return wordSpacing;
  544. }
  545. /**
  546. * @return the "wrap-option" property.
  547. */
  548. public int getWrapOption() {
  549. return wrapOption;
  550. }
  551. /** @return the "text-decoration" property. */
  552. public CommonTextDecoration getTextDecoration() {
  553. return textDecoration;
  554. }
  555. /** @return the baseline-shift property */
  556. public Length getBaselineShift() {
  557. return baselineShift;
  558. }
  559. /** {@inheritDoc} */
  560. public String toString() {
  561. return (this.charBuffer == null) ? "" : this.charBuffer.toString();
  562. }
  563. /** {@inheritDoc} */
  564. public String getLocalName() {
  565. return "#PCDATA";
  566. }
  567. /** {@inheritDoc} */
  568. public String getNormalNamespacePrefix() {
  569. return null;
  570. }
  571. /** {@inheritDoc} */
  572. protected String gatherContextInfo() {
  573. if (this.locator != null) {
  574. return super.gatherContextInfo();
  575. } else {
  576. return this.toString();
  577. }
  578. }
  579. /** {@inheritDoc} */
  580. public char charAt(int position) {
  581. return this.charBuffer.get(position);
  582. }
  583. /** {@inheritDoc} */
  584. public CharSequence subSequence(int start, int end) {
  585. return this.charBuffer.subSequence(start, end);
  586. }
  587. /** {@inheritDoc} */
  588. public int length() {
  589. return this.charBuffer.limit();
  590. }
  591. /**
  592. * Resets the backing <code>java.nio.CharBuffer</code>
  593. */
  594. public void resetBuffer() {
  595. if (this.charBuffer != null) {
  596. this.charBuffer.rewind();
  597. }
  598. }
  599. }