You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

FOText.java 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.fo;
  19. import java.awt.Color;
  20. import java.nio.CharBuffer;
  21. import java.util.NoSuchElementException;
  22. import org.xml.sax.Locator;
  23. import org.apache.fop.accessibility.StructureTreeElement;
  24. import org.apache.fop.apps.FOPException;
  25. import org.apache.fop.datatypes.Length;
  26. import org.apache.fop.fo.flow.Block;
  27. import org.apache.fop.fo.properties.CommonFont;
  28. import org.apache.fop.fo.properties.CommonHyphenation;
  29. import org.apache.fop.fo.properties.CommonTextDecoration;
  30. import org.apache.fop.fo.properties.KeepProperty;
  31. import org.apache.fop.fo.properties.Property;
  32. import org.apache.fop.fo.properties.SpaceProperty;
  33. import org.apache.fop.util.CharUtilities;
  34. /**
  35. * A text node (PCDATA) in the formatting object tree.
  36. */
  37. public class FOText extends FONode implements CharSequence {
  38. /** the <code>CharBuffer</code> containing the text */
  39. private CharBuffer charBuffer;
  40. /** properties relevant for #PCDATA */
  41. private CommonFont commonFont;
  42. private CommonHyphenation commonHyphenation;
  43. private Color color;
  44. private KeepProperty keepTogether;
  45. private Property letterSpacing;
  46. private SpaceProperty lineHeight;
  47. private int whiteSpaceTreatment;
  48. private int whiteSpaceCollapse;
  49. private int textTransform;
  50. private Property wordSpacing;
  51. private int wrapOption;
  52. private Length baselineShift;
  53. /**
  54. * Points to the previous FOText object created within the current
  55. * block. If this is "null", this is the first such object.
  56. */
  57. private FOText prevFOTextThisBlock = null;
  58. /**
  59. * Points to the next FOText object created within the current
  60. * block. If this is "null", this is the last such object.
  61. */
  62. private FOText nextFOTextThisBlock = null;
  63. /**
  64. * Points to the ancestor Block object. This is used to keep track of
  65. * which FOText nodes are descendants of the same block.
  66. */
  67. private Block ancestorBlock = null;
  68. /** Holds the text decoration values. May be null */
  69. private CommonTextDecoration textDecoration;
  70. private StructureTreeElement structureTreeElement;
  71. private static final int IS_WORD_CHAR_FALSE = 0;
  72. private static final int IS_WORD_CHAR_TRUE = 1;
  73. private static final int IS_WORD_CHAR_MAYBE = 2;
  74. /**
  75. * Creates a new FO text node.
  76. *
  77. * @param parent FONode that is the parent of this object
  78. */
  79. public FOText(FONode parent) {
  80. super(parent);
  81. }
  82. /** {@inheritDoc} */
  83. protected void characters(char[] data, int start, int length,
  84. PropertyList list, Locator locator) throws FOPException {
  85. if (this.charBuffer == null) {
  86. // buffer not yet initialized, do so now
  87. this.charBuffer = CharBuffer.allocate(length);
  88. } else {
  89. // allocate a larger buffer, and transfer contents
  90. int newLength = this.charBuffer.limit() + length;
  91. CharBuffer newBuffer = CharBuffer.allocate(newLength);
  92. this.charBuffer.rewind();
  93. newBuffer.put(this.charBuffer);
  94. this.charBuffer = newBuffer;
  95. }
  96. // append characters
  97. this.charBuffer.put(data, start, length);
  98. }
  99. /**
  100. * Return the array of characters for this instance.
  101. *
  102. * @return a char sequence containing the text
  103. */
  104. public CharSequence getCharSequence() {
  105. if (this.charBuffer == null) {
  106. return null;
  107. }
  108. this.charBuffer.rewind();
  109. return this.charBuffer.asReadOnlyBuffer().subSequence(0, this.charBuffer.limit());
  110. }
  111. /** {@inheritDoc} */
  112. public FONode clone(FONode parent, boolean removeChildren)
  113. throws FOPException {
  114. FOText ft = (FOText) super.clone(parent, removeChildren);
  115. if (removeChildren) {
  116. // not really removing, just make sure the char buffer
  117. // pointed to is really a different one
  118. if (this.charBuffer != null) {
  119. ft.charBuffer = CharBuffer.allocate(this.charBuffer.limit());
  120. this.charBuffer.rewind();
  121. ft.charBuffer.put(this.charBuffer);
  122. ft.charBuffer.rewind();
  123. }
  124. }
  125. ft.prevFOTextThisBlock = null;
  126. ft.nextFOTextThisBlock = null;
  127. ft.ancestorBlock = null;
  128. return ft;
  129. }
  130. /** {@inheritDoc} */
  131. public void bind(PropertyList pList) throws FOPException {
  132. this.commonFont = pList.getFontProps();
  133. this.commonHyphenation = pList.getHyphenationProps();
  134. this.color = pList.get(Constants.PR_COLOR).getColor(getUserAgent());
  135. this.keepTogether = pList.get(Constants.PR_KEEP_TOGETHER).getKeep();
  136. this.lineHeight = pList.get(Constants.PR_LINE_HEIGHT).getSpace();
  137. this.letterSpacing = pList.get(Constants.PR_LETTER_SPACING);
  138. this.whiteSpaceCollapse = pList.get(Constants.PR_WHITE_SPACE_COLLAPSE).getEnum();
  139. this.whiteSpaceTreatment = pList.get(Constants.PR_WHITE_SPACE_TREATMENT).getEnum();
  140. this.textTransform = pList.get(Constants.PR_TEXT_TRANSFORM).getEnum();
  141. this.wordSpacing = pList.get(Constants.PR_WORD_SPACING);
  142. this.wrapOption = pList.get(Constants.PR_WRAP_OPTION).getEnum();
  143. this.textDecoration = pList.getTextDecorationProps();
  144. this.baselineShift = pList.get(Constants.PR_BASELINE_SHIFT).getLength();
  145. }
  146. /** {@inheritDoc} */
  147. protected void endOfNode() throws FOPException {
  148. super.endOfNode();
  149. getFOEventHandler().characters(this);
  150. }
  151. /** {@inheritDoc} */
  152. public void finalizeNode() {
  153. textTransform();
  154. }
  155. /**
  156. * Check if this text node will create an area.
  157. * This means either there is non-whitespace or it is
  158. * preserved whitespace.
  159. * Maybe this just needs to check length > 0, since char iterators
  160. * handle whitespace.
  161. *
  162. * @return true if this will create an area in the output
  163. */
  164. public boolean willCreateArea() {
  165. if (whiteSpaceCollapse == Constants.EN_FALSE
  166. && this.charBuffer.limit() > 0) {
  167. return true;
  168. }
  169. char ch;
  170. this.charBuffer.rewind();
  171. while (this.charBuffer.hasRemaining()) {
  172. ch = this.charBuffer.get();
  173. if (!((ch == CharUtilities.SPACE)
  174. || (ch == CharUtilities.LINEFEED_CHAR)
  175. || (ch == CharUtilities.CARRIAGE_RETURN)
  176. || (ch == CharUtilities.TAB))) {
  177. // not whitespace
  178. this.charBuffer.rewind();
  179. return true;
  180. }
  181. }
  182. return false;
  183. }
  184. /**
  185. * @return a new TextCharIterator
  186. */
  187. public CharIterator charIterator() {
  188. return new TextCharIterator();
  189. }
  190. /**
  191. * This method is run as part of the ancestor Block's flushText(), to
  192. * create xref pointers to the previous FOText objects within the same Block
  193. * @param ancestorBlock the ancestor fo:block
  194. */
  195. protected void createBlockPointers(Block ancestorBlock) {
  196. this.ancestorBlock = ancestorBlock;
  197. // if the last FOText is a sibling, point to it, and have it point here
  198. if (ancestorBlock.lastFOTextProcessed != null) {
  199. if (ancestorBlock.lastFOTextProcessed.ancestorBlock
  200. == this.ancestorBlock) {
  201. prevFOTextThisBlock = ancestorBlock.lastFOTextProcessed;
  202. prevFOTextThisBlock.nextFOTextThisBlock = this;
  203. } else {
  204. prevFOTextThisBlock = null;
  205. }
  206. }
  207. }
  208. /**
  209. * This method is run as part of endOfNode(), to handle the
  210. * text-transform property for accumulated FOText
  211. */
  212. private void textTransform() {
  213. if (getBuilderContext().inMarker()
  214. || textTransform == Constants.EN_NONE) {
  215. return;
  216. }
  217. this.charBuffer.rewind();
  218. CharBuffer tmp = this.charBuffer.slice();
  219. char c;
  220. int lim = this.charBuffer.limit();
  221. int pos = -1;
  222. while (++pos < lim) {
  223. c = this.charBuffer.get();
  224. switch (textTransform) {
  225. case Constants.EN_UPPERCASE:
  226. tmp.put(Character.toUpperCase(c));
  227. break;
  228. case Constants.EN_LOWERCASE:
  229. tmp.put(Character.toLowerCase(c));
  230. break;
  231. case Constants.EN_CAPITALIZE:
  232. if (isStartOfWord(pos)) {
  233. /*
  234. Use toTitleCase here. Apparently, some languages use
  235. a different character to represent a letter when using
  236. initial caps than when all of the letters in the word
  237. are capitalized. We will try to let Java handle this.
  238. */
  239. tmp.put(Character.toTitleCase(c));
  240. } else {
  241. tmp.put(c);
  242. }
  243. break;
  244. default:
  245. //should never happen as the property subsystem catches that case
  246. assert false;
  247. //nop
  248. }
  249. }
  250. }
  251. /**
  252. * Determines whether a particular location in an FOText object's text is
  253. * the start of a new "word". The use of "word" here is specifically for
  254. * the text-transform property, but may be useful for other things as
  255. * well, such as word-spacing. The definition of "word" is somewhat ambiguous
  256. * and appears to be definable by the user agent.
  257. *
  258. * @param i index into charBuffer
  259. *
  260. * @return True if the character at this location is the start of a new
  261. * word.
  262. */
  263. private boolean isStartOfWord(int i) {
  264. char prevChar = getRelativeCharInBlock(i, -1);
  265. /* All we are really concerned about here is of what type prevChar
  266. * is. If inputChar is not part of a word, then the Java
  267. * conversions will (we hope) simply return inputChar.
  268. */
  269. switch (isWordChar(prevChar)) {
  270. case IS_WORD_CHAR_TRUE:
  271. return false;
  272. case IS_WORD_CHAR_FALSE:
  273. return true;
  274. /* "MAYBE" implies that additional context is needed. An example is a
  275. * single-quote, either straight or closing, which might be interpreted
  276. * as a possessive or a contraction, or might be a closing quote.
  277. */
  278. case IS_WORD_CHAR_MAYBE:
  279. char prevPrevChar = getRelativeCharInBlock(i, -2);
  280. switch (isWordChar(prevPrevChar)) {
  281. case IS_WORD_CHAR_TRUE:
  282. return false;
  283. case IS_WORD_CHAR_FALSE:
  284. return true;
  285. case IS_WORD_CHAR_MAYBE:
  286. return true;
  287. default:
  288. return false;
  289. }
  290. default:
  291. return false;
  292. }
  293. }
  294. /**
  295. * Finds a character within the current Block that is relative in location
  296. * to a character in the current FOText. Treats all FOText objects within a
  297. * block as one unit, allowing text in adjoining FOText objects to be
  298. * returned if the parameters are outside of the current object.
  299. *
  300. * @param i index into the CharBuffer
  301. * @param offset signed integer with relative position within the
  302. * block of the character to return. To return the character immediately
  303. * preceding i, pass -1. To return the character immediately after i,
  304. * pass 1.
  305. * @return the character in the offset position within the block; \u0000 if
  306. * the offset points to an area outside of the block.
  307. */
  308. private char getRelativeCharInBlock(int i, int offset) {
  309. int charIndex = i + offset;
  310. // The easy case is where the desired character is in the same FOText
  311. if (charIndex >= 0 && charIndex < this.length()) {
  312. return this.charAt(i + offset);
  313. }
  314. // For now, we can't look at following FOText nodes
  315. if (offset > 0) {
  316. return CharUtilities.NULL_CHAR;
  317. }
  318. // Remaining case has the text in some previous FOText node
  319. boolean foundChar = false;
  320. char charToReturn = CharUtilities.NULL_CHAR;
  321. FOText nodeToTest = this;
  322. int remainingOffset = offset + i;
  323. while (!foundChar) {
  324. if (nodeToTest.prevFOTextThisBlock == null) {
  325. break;
  326. }
  327. nodeToTest = nodeToTest.prevFOTextThisBlock;
  328. int diff = nodeToTest.length() + remainingOffset - 1;
  329. if (diff >= 0) {
  330. charToReturn = nodeToTest.charAt(diff);
  331. foundChar = true;
  332. } else {
  333. remainingOffset += diff;
  334. }
  335. }
  336. return charToReturn;
  337. }
  338. /**
  339. * @return The previous FOText node in this Block; null, if this is the
  340. * first FOText in this Block.
  341. */
  342. public FOText getPrevFOTextThisBlock () {
  343. return prevFOTextThisBlock;
  344. }
  345. /**
  346. * @return The next FOText node in this Block; null if this is the last
  347. * FOText in this Block; null if subsequent FOText nodes have not yet been
  348. * processed.
  349. */
  350. public FOText getNextFOTextThisBlock () {
  351. return nextFOTextThisBlock;
  352. }
  353. /**
  354. * @return The nearest ancestor block object which contains this FOText.
  355. */
  356. public Block getAncestorBlock () {
  357. return ancestorBlock;
  358. }
  359. /**
  360. * Determines whether the input char should be considered part of a
  361. * "word". This is used primarily to determine whether the character
  362. * immediately following starts a new word, but may have other uses.
  363. * We have not found a definition of "word" in the standard (1.0), so the
  364. * logic used here is based on the programmer's best guess.
  365. *
  366. * @param inputChar the character to be tested.
  367. * @return int IS_WORD_CHAR_TRUE, IS_WORD_CHAR_FALSE, or IS_WORD_CHAR_MAYBE,
  368. * depending on whether the character should be considered part of a word
  369. * or not.
  370. */
  371. private static int isWordChar(char inputChar) {
  372. switch (Character.getType(inputChar)) {
  373. case Character.COMBINING_SPACING_MARK:
  374. return IS_WORD_CHAR_TRUE;
  375. case Character.CONNECTOR_PUNCTUATION:
  376. return IS_WORD_CHAR_TRUE;
  377. case Character.CONTROL:
  378. return IS_WORD_CHAR_FALSE;
  379. case Character.CURRENCY_SYMBOL:
  380. return IS_WORD_CHAR_TRUE;
  381. case Character.DASH_PUNCTUATION:
  382. if (inputChar == '-') {
  383. return IS_WORD_CHAR_TRUE; //hyphen
  384. }
  385. return IS_WORD_CHAR_FALSE;
  386. case Character.DECIMAL_DIGIT_NUMBER:
  387. return IS_WORD_CHAR_TRUE;
  388. case Character.ENCLOSING_MARK:
  389. return IS_WORD_CHAR_FALSE;
  390. case Character.END_PUNCTUATION:
  391. if (inputChar == '\u2019') {
  392. return IS_WORD_CHAR_MAYBE; //apostrophe, right single quote
  393. }
  394. return IS_WORD_CHAR_FALSE;
  395. case Character.FORMAT:
  396. return IS_WORD_CHAR_FALSE;
  397. case Character.LETTER_NUMBER:
  398. return IS_WORD_CHAR_TRUE;
  399. case Character.LINE_SEPARATOR:
  400. return IS_WORD_CHAR_FALSE;
  401. case Character.LOWERCASE_LETTER:
  402. return IS_WORD_CHAR_TRUE;
  403. case Character.MATH_SYMBOL:
  404. return IS_WORD_CHAR_FALSE;
  405. case Character.MODIFIER_LETTER:
  406. return IS_WORD_CHAR_TRUE;
  407. case Character.MODIFIER_SYMBOL:
  408. return IS_WORD_CHAR_TRUE;
  409. case Character.NON_SPACING_MARK:
  410. return IS_WORD_CHAR_TRUE;
  411. case Character.OTHER_LETTER:
  412. return IS_WORD_CHAR_TRUE;
  413. case Character.OTHER_NUMBER:
  414. return IS_WORD_CHAR_TRUE;
  415. case Character.OTHER_PUNCTUATION:
  416. if (inputChar == '\'') {
  417. return IS_WORD_CHAR_MAYBE; //ASCII apostrophe
  418. }
  419. return IS_WORD_CHAR_FALSE;
  420. case Character.OTHER_SYMBOL:
  421. return IS_WORD_CHAR_TRUE;
  422. case Character.PARAGRAPH_SEPARATOR:
  423. return IS_WORD_CHAR_FALSE;
  424. case Character.PRIVATE_USE:
  425. return IS_WORD_CHAR_FALSE;
  426. case Character.SPACE_SEPARATOR:
  427. return IS_WORD_CHAR_FALSE;
  428. case Character.START_PUNCTUATION:
  429. return IS_WORD_CHAR_FALSE;
  430. case Character.SURROGATE:
  431. return IS_WORD_CHAR_FALSE;
  432. case Character.TITLECASE_LETTER:
  433. return IS_WORD_CHAR_TRUE;
  434. case Character.UNASSIGNED:
  435. return IS_WORD_CHAR_FALSE;
  436. case Character.UPPERCASE_LETTER:
  437. return IS_WORD_CHAR_TRUE;
  438. default:
  439. return IS_WORD_CHAR_FALSE;
  440. }
  441. }
  442. private class TextCharIterator extends CharIterator {
  443. private int currentPosition = 0;
  444. private boolean canRemove = false;
  445. private boolean canReplace = false;
  446. /** {@inheritDoc} */
  447. public boolean hasNext() {
  448. return (this.currentPosition < charBuffer.limit());
  449. }
  450. /** {@inheritDoc} */
  451. public char nextChar() {
  452. if (this.currentPosition < charBuffer.limit()) {
  453. this.canRemove = true;
  454. this.canReplace = true;
  455. return charBuffer.get(currentPosition++);
  456. } else {
  457. throw new NoSuchElementException();
  458. }
  459. }
  460. /** {@inheritDoc} */
  461. public void remove() {
  462. if (this.canRemove) {
  463. charBuffer.position(currentPosition);
  464. // Slice the buffer at the current position
  465. CharBuffer tmp = charBuffer.slice();
  466. // Reset position to before current character
  467. charBuffer.position(--currentPosition);
  468. if (tmp.hasRemaining()) {
  469. // Transfer any remaining characters
  470. charBuffer.mark();
  471. charBuffer.put(tmp);
  472. charBuffer.reset();
  473. }
  474. // Decrease limit
  475. charBuffer.limit(charBuffer.limit() - 1);
  476. // Make sure following calls fail, unless nextChar() was called
  477. this.canRemove = false;
  478. } else {
  479. throw new IllegalStateException();
  480. }
  481. }
  482. /** {@inheritDoc} */
  483. public void replaceChar(char c) {
  484. if (this.canReplace) {
  485. charBuffer.put(currentPosition - 1, c);
  486. } else {
  487. throw new IllegalStateException();
  488. }
  489. }
  490. }
  491. /**
  492. * @return the Common Font Properties.
  493. */
  494. public CommonFont getCommonFont() {
  495. return commonFont;
  496. }
  497. /**
  498. * @return the Common Hyphenation Properties.
  499. */
  500. public CommonHyphenation getCommonHyphenation() {
  501. return commonHyphenation;
  502. }
  503. /**
  504. * @return the "color" property.
  505. */
  506. public Color getColor() {
  507. return color;
  508. }
  509. /**
  510. * @return the "keep-together" property.
  511. */
  512. public KeepProperty getKeepTogether() {
  513. return keepTogether;
  514. }
  515. /**
  516. * @return the "letter-spacing" property.
  517. */
  518. public Property getLetterSpacing() {
  519. return letterSpacing;
  520. }
  521. /**
  522. * @return the "line-height" property.
  523. */
  524. public SpaceProperty getLineHeight() {
  525. return lineHeight;
  526. }
  527. /**
  528. * @return the "white-space-treatment" property
  529. */
  530. public int getWhitespaceTreatment() {
  531. return whiteSpaceTreatment;
  532. }
  533. /**
  534. * @return the "word-spacing" property.
  535. */
  536. public Property getWordSpacing() {
  537. return wordSpacing;
  538. }
  539. /**
  540. * @return the "wrap-option" property.
  541. */
  542. public int getWrapOption() {
  543. return wrapOption;
  544. }
  545. /** @return the "text-decoration" property. */
  546. public CommonTextDecoration getTextDecoration() {
  547. return textDecoration;
  548. }
  549. /** @return the baseline-shift property */
  550. public Length getBaseLineShift() {
  551. return baselineShift;
  552. }
  553. /** {@inheritDoc} */
  554. public String toString() {
  555. return (this.charBuffer == null) ? "" : this.charBuffer.toString();
  556. }
  557. /** {@inheritDoc} */
  558. public String getLocalName() {
  559. return "#PCDATA";
  560. }
  561. /** {@inheritDoc} */
  562. public String getNormalNamespacePrefix() {
  563. return null;
  564. }
  565. /** {@inheritDoc} */
  566. protected String gatherContextInfo() {
  567. if (this.locator != null) {
  568. return super.gatherContextInfo();
  569. } else {
  570. return this.toString();
  571. }
  572. }
  573. /** {@inheritDoc} */
  574. public char charAt(int position) {
  575. return this.charBuffer.get(position);
  576. }
  577. /** {@inheritDoc} */
  578. public CharSequence subSequence(int start, int end) {
  579. return this.charBuffer.subSequence(start, end);
  580. }
  581. /** {@inheritDoc} */
  582. public int length() {
  583. return this.charBuffer.limit();
  584. }
  585. /**
  586. * Resets the backing <code>java.nio.CharBuffer</code>
  587. */
  588. public void resetBuffer() {
  589. if (this.charBuffer != null) {
  590. this.charBuffer.rewind();
  591. }
  592. }
  593. @Override
  594. public void setStructureTreeElement(StructureTreeElement structureTreeElement) {
  595. this.structureTreeElement = structureTreeElement;
  596. }
  597. public StructureTreeElement getStructureTreeElement() {
  598. return structureTreeElement;
  599. }
  600. }