123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.xssf.usermodel;
-
- import java.util.*;
- import java.util.regex.Pattern;
- import java.util.regex.Matcher;
-
- import javax.xml.namespace.QName;
-
- import org.apache.poi.ss.usermodel.Font;
- import org.apache.poi.ss.usermodel.RichTextString;
- import org.apache.poi.xssf.model.StylesTable;
- import org.apache.poi.xssf.model.ThemesTable;
- import org.apache.poi.util.Internal;
- import org.apache.xmlbeans.XmlCursor;
- import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTColor;
- import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTFont;
- import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRElt;
- import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRPrElt;
- import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
- import org.openxmlformats.schemas.spreadsheetml.x2006.main.STXstring;
-
-
- /**
- * Rich text unicode string. These strings can have fonts applied to arbitary parts of the string.
- *
- * <p>
- * Most strings in a workbook have formatting applied at the cell level, that is, the entire string in the cell has the
- * same formatting applied. In these cases, the formatting for the cell is stored in the styles part,
- * and the string for the cell can be shared across the workbook. The following code illustrates the example.
- * </p>
- *
- * <blockquote>
- * <pre>
- * cell1.setCellValue(new XSSFRichTextString("Apache POI"));
- * cell2.setCellValue(new XSSFRichTextString("Apache POI"));
- * cell3.setCellValue(new XSSFRichTextString("Apache POI"));
- * </pre>
- * </blockquote>
- * In the above example all three cells will use the same string cached on workbook level.
- *
- * <p>
- * Some strings in the workbook may have formatting applied at a level that is more granular than the cell level.
- * For instance, specific characters within the string may be bolded, have coloring, italicizing, etc.
- * In these cases, the formatting is stored along with the text in the string table, and is treated as
- * a unique entry in the workbook. The following xml and code snippet illustrate this.
- * </p>
- *
- * <blockquote>
- * <pre>
- * XSSFRichTextString s1 = new XSSFRichTextString("Apache POI");
- * s1.applyFont(boldArial);
- * cell1.setCellValue(s1);
- *
- * XSSFRichTextString s2 = new XSSFRichTextString("Apache POI");
- * s2.applyFont(italicCourier);
- * cell2.setCellValue(s2);
- * </pre>
- * </blockquote>
- */
- public class XSSFRichTextString implements RichTextString {
- private static final Pattern utfPtrn = Pattern.compile("_x([0-9A-Fa-f]{4})_");
-
- private CTRst st;
- private StylesTable styles;
-
- /**
- * Create a rich text string
- */
- public XSSFRichTextString(String str) {
- st = CTRst.Factory.newInstance();
- st.setT(str);
- preserveSpaces(st.xgetT());
- }
-
- /**
- * Create empty rich text string and initialize it with empty string
- */
- public XSSFRichTextString() {
- st = CTRst.Factory.newInstance();
- }
-
- /**
- * Create a rich text string from the supplied XML bean
- */
- public XSSFRichTextString(CTRst st) {
- this.st = st;
- }
-
- /**
- * Applies a font to the specified characters of a string.
- *
- * @param startIndex The start index to apply the font to (inclusive)
- * @param endIndex The end index to apply the font to (exclusive)
- * @param fontIndex The font to use.
- */
- public void applyFont(int startIndex, int endIndex, short fontIndex) {
- XSSFFont font;
- if(styles == null) {
- //style table is not set, remember fontIndex and set the run properties later,
- //when setStylesTableReference is called
- font = new XSSFFont();
- font.setFontName("#" + fontIndex);
- } else {
- font = styles.getFontAt(fontIndex);
- }
- applyFont(startIndex, endIndex, font);
- }
-
- /**
- * Applies a font to the specified characters of a string.
- *
- * @param startIndex The start index to apply the font to (inclusive)
- * @param endIndex The end index to apply to font to (exclusive)
- * @param font The index of the font to use.
- */
- public void applyFont(int startIndex, int endIndex, Font font) {
- if (startIndex > endIndex)
- throw new IllegalArgumentException("Start index must be less than end index, but had " + startIndex + " and " + endIndex);
- if (startIndex < 0 || endIndex > length())
- throw new IllegalArgumentException("Start and end index not in range, but had " + startIndex + " and " + endIndex);
-
- if (startIndex == endIndex)
- return;
-
- if(st.sizeOfRArray() == 0 && st.isSetT()) {
- //convert <t>string</t> into a text run: <r><t>string</t></r>
- st.addNewR().setT(st.getT());
- st.unsetT();
- }
-
- String text = getString();
- XSSFFont xssfFont = (XSSFFont)font;
-
- TreeMap<Integer, CTRPrElt> formats = getFormatMap(st);
- CTRPrElt fmt = CTRPrElt.Factory.newInstance();
- setRunAttributes(xssfFont.getCTFont(), fmt);
- applyFont(formats, startIndex, endIndex, fmt);
-
- CTRst newSt = buildCTRst(text, formats);
- st.set(newSt);
- }
-
- /**
- * Sets the font of the entire string.
- * @param font The font to use.
- */
- public void applyFont(Font font) {
- String text = getString();
- applyFont(0, text.length(), font);
- }
-
- /**
- * Applies the specified font to the entire string.
- *
- * @param fontIndex the font to apply.
- */
- public void applyFont(short fontIndex) {
- XSSFFont font;
- if(styles == null) {
- font = new XSSFFont();
- font.setFontName("#" + fontIndex);
- } else {
- font = styles.getFontAt(fontIndex);
- }
- String text = getString();
- applyFont(0, text.length(), font);
- }
-
- /**
- * Append new text to this text run and apply the specify font to it
- *
- * @param text the text to append
- * @param font the font to apply to the appended text or <code>null</code> if no formatting is required
- */
- public void append(String text, XSSFFont font){
- if(st.sizeOfRArray() == 0 && st.isSetT()) {
- //convert <t>string</t> into a text run: <r><t>string</t></r>
- CTRElt lt = st.addNewR();
- lt.setT(st.getT());
- preserveSpaces(lt.xgetT());
- st.unsetT();
- }
- CTRElt lt = st.addNewR();
- lt.setT(text);
- preserveSpaces(lt.xgetT());
-
- if (font != null) {
- CTRPrElt pr = lt.addNewRPr();
- setRunAttributes(font.getCTFont(), pr);
- }
- }
-
- /**
- * Append new text to this text run
- *
- * @param text the text to append
- */
- public void append(String text){
- append(text, null);
- }
-
- /**
- * Copy font attributes from CTFont bean into CTRPrElt bean
- */
- private void setRunAttributes(CTFont ctFont, CTRPrElt pr){
- if(ctFont.sizeOfBArray() > 0) pr.addNewB().setVal(ctFont.getBArray(0).getVal());
- if(ctFont.sizeOfUArray() > 0) pr.addNewU().setVal(ctFont.getUArray(0).getVal());
- if(ctFont.sizeOfIArray() > 0) pr.addNewI().setVal(ctFont.getIArray(0).getVal());
- if(ctFont.sizeOfColorArray() > 0) {
- CTColor c1 = ctFont.getColorArray(0);
- CTColor c2 = pr.addNewColor();
- if(c1.isSetAuto()) c2.setAuto(c1.getAuto());
- if(c1.isSetIndexed()) c2.setIndexed(c1.getIndexed());
- if(c1.isSetRgb()) c2.setRgb(c1.getRgb());
- if(c1.isSetTheme()) c2.setTheme(c1.getTheme());
- if(c1.isSetTint()) c2.setTint(c1.getTint());
- }
- if(ctFont.sizeOfSzArray() > 0) pr.addNewSz().setVal(ctFont.getSzArray(0).getVal());
- if(ctFont.sizeOfNameArray() > 0) pr.addNewRFont().setVal(ctFont.getNameArray(0).getVal());
- if(ctFont.sizeOfFamilyArray() > 0) pr.addNewFamily().setVal(ctFont.getFamilyArray(0).getVal());
- if(ctFont.sizeOfSchemeArray() > 0) pr.addNewScheme().setVal(ctFont.getSchemeArray(0).getVal());
- if(ctFont.sizeOfCharsetArray() > 0) pr.addNewCharset().setVal(ctFont.getCharsetArray(0).getVal());
- if(ctFont.sizeOfCondenseArray() > 0) pr.addNewCondense().setVal(ctFont.getCondenseArray(0).getVal());
- if(ctFont.sizeOfExtendArray() > 0) pr.addNewExtend().setVal(ctFont.getExtendArray(0).getVal());
- if(ctFont.sizeOfVertAlignArray() > 0) pr.addNewVertAlign().setVal(ctFont.getVertAlignArray(0).getVal());
- if(ctFont.sizeOfOutlineArray() > 0) pr.addNewOutline().setVal(ctFont.getOutlineArray(0).getVal());
- if(ctFont.sizeOfShadowArray() > 0) pr.addNewShadow().setVal(ctFont.getShadowArray(0).getVal());
- if(ctFont.sizeOfStrikeArray() > 0) pr.addNewStrike().setVal(ctFont.getStrikeArray(0).getVal());
- }
-
- /**
- * Does this string have any explicit formatting applied, or is
- * it just text in the default style?
- */
- public boolean hasFormatting() {
- //noinspection deprecation - for performance reasons!
- CTRElt[] rs = st.getRArray();
- if (rs == null || rs.length == 0) {
- return false;
- }
- for (CTRElt r : rs) {
- if (r.isSetRPr()) return true;
- }
- return false;
- }
-
- /**
- * Removes any formatting that may have been applied to the string.
- */
- public void clearFormatting() {
- String text = getString();
- st.setRArray(null);
- st.setT(text);
- }
-
- /**
- * The index within the string to which the specified formatting run applies.
- *
- * @param index the index of the formatting run
- * @return the index within the string.
- */
- public int getIndexOfFormattingRun(int index) {
- if(st.sizeOfRArray() == 0) return 0;
-
- int pos = 0;
- for(int i = 0; i < st.sizeOfRArray(); i++){
- CTRElt r = st.getRArray(i);
- if(i == index) return pos;
-
- pos += r.getT().length();
- }
- return -1;
- }
-
- /**
- * Returns the number of characters this format run covers.
- *
- * @param index the index of the formatting run
- * @return the number of characters this format run covers
- */
- public int getLengthOfFormattingRun(int index) {
- if(st.sizeOfRArray() == 0 || index >= st.sizeOfRArray()) {
- return -1;
- }
-
- CTRElt r = st.getRArray(index);
- return r.getT().length();
- }
-
- /**
- * Returns the plain string representation.
- */
- public String getString() {
- if(st.sizeOfRArray() == 0) {
- return utfDecode(st.getT());
- }
- StringBuilder buf = new StringBuilder();
- //noinspection deprecation - for performance reasons!
- for(CTRElt r : st.getRArray()){
- buf.append(r.getT());
- }
- return utfDecode(buf.toString());
- }
-
- /**
- * Removes any formatting and sets new string value
- *
- * @param s new string value
- */
- public void setString(String s){
- clearFormatting();
- st.setT(s);
- preserveSpaces(st.xgetT());
- }
-
- /**
- * Returns the plain string representation.
- */
- public String toString() {
- return getString();
- }
-
- /**
- * Returns the number of characters in this string.
- */
- public int length() {
- return getString().length();
- }
-
- /**
- * @return The number of formatting runs used.
- */
- public int numFormattingRuns() {
- return st.sizeOfRArray();
- }
-
- /**
- * Gets a copy of the font used in a particular formatting run.
- *
- * @param index the index of the formatting run
- * @return A copy of the font used or null if no formatting is applied to the specified text run.
- */
- public XSSFFont getFontOfFormattingRun(int index) {
- if(st.sizeOfRArray() == 0 || index >= st.sizeOfRArray()) {
- return null;
- }
-
- CTRElt r = st.getRArray(index);
- if(r.getRPr() != null) {
- XSSFFont fnt = new XSSFFont(toCTFont(r.getRPr()));
- fnt.setThemesTable(getThemesTable());
- return fnt;
- }
-
- return null;
- }
-
- /**
- * Return a copy of the font in use at a particular index.
- *
- * @param index The index.
- * @return A copy of the font that's currently being applied at that
- * index or null if no font is being applied or the
- * index is out of range.
- */
- public XSSFFont getFontAtIndex( int index ) {
- final ThemesTable themes = getThemesTable();
- int pos = 0;
- //noinspection deprecation - for performance reasons!
- for(CTRElt r : st.getRArray()){
- final int length = r.getT().length();
- if(index >= pos && index < pos + length) {
- XSSFFont fnt = new XSSFFont(toCTFont(r.getRPr()));
- fnt.setThemesTable(themes);
- return fnt;
- }
-
- pos += length;
- }
- return null;
-
- }
-
- /**
- * Return the underlying xml bean
- */
- @Internal
- public CTRst getCTRst() {
- return st;
- }
-
- protected void setStylesTableReference(StylesTable tbl){
- styles = tbl;
- if(st.sizeOfRArray() > 0) {
- //noinspection deprecation - for performance reasons!
- for (CTRElt r : st.getRArray()) {
- CTRPrElt pr = r.getRPr();
- if(pr != null && pr.sizeOfRFontArray() > 0){
- String fontName = pr.getRFontArray(0).getVal();
- if(fontName.startsWith("#")){
- int idx = Integer.parseInt(fontName.substring(1));
- XSSFFont font = styles.getFontAt(idx);
- pr.removeRFont(0);
- setRunAttributes(font.getCTFont(), pr);
- }
- }
- }
- }
- }
-
- /**
- *
- * CTRPrElt --> CTFont adapter
- */
- protected static CTFont toCTFont(CTRPrElt pr){
- CTFont ctFont = CTFont.Factory.newInstance();
-
- // Bug 58315: there are files where there is no pr-entry for a RichTextString
- if(pr == null) {
- return ctFont;
- }
-
- if(pr.sizeOfBArray() > 0) ctFont.addNewB().setVal(pr.getBArray(0).getVal());
- if(pr.sizeOfUArray() > 0) ctFont.addNewU().setVal(pr.getUArray(0).getVal());
- if(pr.sizeOfIArray() > 0) ctFont.addNewI().setVal(pr.getIArray(0).getVal());
- if(pr.sizeOfColorArray() > 0) {
- CTColor c1 = pr.getColorArray(0);
- CTColor c2 = ctFont.addNewColor();
- if(c1.isSetAuto()) c2.setAuto(c1.getAuto());
- if(c1.isSetIndexed()) c2.setIndexed(c1.getIndexed());
- if(c1.isSetRgb()) c2.setRgb(c1.getRgb());
- if(c1.isSetTheme()) c2.setTheme(c1.getTheme());
- if(c1.isSetTint()) c2.setTint(c1.getTint());
- }
- if(pr.sizeOfSzArray() > 0) ctFont.addNewSz().setVal(pr.getSzArray(0).getVal());
- if(pr.sizeOfRFontArray() > 0) ctFont.addNewName().setVal(pr.getRFontArray(0).getVal());
- if(pr.sizeOfFamilyArray() > 0) ctFont.addNewFamily().setVal(pr.getFamilyArray(0).getVal());
- if(pr.sizeOfSchemeArray() > 0) ctFont.addNewScheme().setVal(pr.getSchemeArray(0).getVal());
- if(pr.sizeOfCharsetArray() > 0) ctFont.addNewCharset().setVal(pr.getCharsetArray(0).getVal());
- if(pr.sizeOfCondenseArray() > 0) ctFont.addNewCondense().setVal(pr.getCondenseArray(0).getVal());
- if(pr.sizeOfExtendArray() > 0) ctFont.addNewExtend().setVal(pr.getExtendArray(0).getVal());
- if(pr.sizeOfVertAlignArray() > 0) ctFont.addNewVertAlign().setVal(pr.getVertAlignArray(0).getVal());
- if(pr.sizeOfOutlineArray() > 0) ctFont.addNewOutline().setVal(pr.getOutlineArray(0).getVal());
- if(pr.sizeOfShadowArray() > 0) ctFont.addNewShadow().setVal(pr.getShadowArray(0).getVal());
- if(pr.sizeOfStrikeArray() > 0) ctFont.addNewStrike().setVal(pr.getStrikeArray(0).getVal());
-
- return ctFont;
- }
-
- /**
- * Add the xml:spaces="preserve" attribute if the string has leading or trailing spaces
- *
- * @param xs the string to check
- */
- protected static void preserveSpaces(STXstring xs) {
- String text = xs.getStringValue();
- if (text != null && text.length() > 0) {
- char firstChar = text.charAt(0);
- char lastChar = text.charAt(text.length() - 1);
- if(Character.isWhitespace(firstChar) || Character.isWhitespace(lastChar)) {
- XmlCursor c = xs.newCursor();
- c.toNextToken();
- c.insertAttributeWithValue(new QName("http://www.w3.org/XML/1998/namespace", "space"), "preserve");
- c.dispose();
- }
- }
- }
-
- /**
- * For all characters which cannot be represented in XML as defined by the XML 1.0 specification,
- * the characters are escaped using the Unicode numerical character representation escape character
- * format _xHHHH_, where H represents a hexadecimal character in the character's value.
- * <p>
- * Example: The Unicode character 0D is invalid in an XML 1.0 document,
- * so it shall be escaped as <code>_x000D_</code>.
- * </p>
- * See section 3.18.9 in the OOXML spec.
- *
- * @param value the string to decode
- * @return the decoded string
- */
- static String utfDecode(String value){
- if(value == null || !value.contains("_x")) {
- return value;
- }
-
- StringBuilder buf = new StringBuilder();
- Matcher m = utfPtrn.matcher(value);
- int idx = 0;
- while(m.find()) {
- int pos = m.start();
- if( pos > idx) {
- buf.append(value.substring(idx, pos));
- }
-
- String code = m.group(1);
- int icode = Integer.decode("0x" + code);
- buf.append((char)icode);
-
- idx = m.end();
- }
-
- // small optimization: don't go via StringBuilder if not necessary,
- // the encodings are very rare, so we should almost always go via this shortcut.
- if(idx == 0) {
- return value;
- }
-
- buf.append(value.substring(idx));
- return buf.toString();
- }
-
- void applyFont(TreeMap<Integer, CTRPrElt> formats, int startIndex, int endIndex, CTRPrElt fmt) {
- // delete format runs that fit between startIndex and endIndex
- // runs intersecting startIndex and endIndex remain
- int runStartIdx = 0;
- for (Iterator<Integer> it = formats.keySet().iterator(); it.hasNext();) {
- int runEndIdx = it.next();
- if (runStartIdx >= startIndex && runEndIdx < endIndex) {
- it.remove();
- }
- runStartIdx = runEndIdx;
- }
-
- if(startIndex > 0 && !formats.containsKey(startIndex)) {
- // If there's a format that starts later in the string, make it start now
- for(Map.Entry<Integer, CTRPrElt> entry : formats.entrySet()) {
- if(entry.getKey() > startIndex) {
- formats.put(startIndex, entry.getValue());
- break;
- }
- }
- }
- formats.put(endIndex, fmt);
-
- // assure that the range [startIndex, endIndex] consists if a single run
- // there can be two or three runs depending whether startIndex or endIndex
- // intersected existing format runs
- SortedMap<Integer, CTRPrElt> sub = formats.subMap(startIndex, endIndex);
- while(sub.size() > 1) sub.remove(sub.lastKey());
- }
-
- TreeMap<Integer, CTRPrElt> getFormatMap(CTRst entry){
- int length = 0;
- TreeMap<Integer, CTRPrElt> formats = new TreeMap<Integer, CTRPrElt>();
- //noinspection deprecation - for performance reasons!
- for (CTRElt r : entry.getRArray()) {
- String txt = r.getT();
- CTRPrElt fmt = r.getRPr();
-
- length += txt.length();
- formats.put(length, fmt);
- }
- return formats;
- }
-
- CTRst buildCTRst(String text, TreeMap<Integer, CTRPrElt> formats){
- if(text.length() != formats.lastKey()) {
- throw new IllegalArgumentException("Text length was " + text.length() +
- " but the last format index was " + formats.lastKey());
- }
- CTRst stf = CTRst.Factory.newInstance();
- int runStartIdx = 0;
- for (Map.Entry<Integer, CTRPrElt> me : formats.entrySet()) {
- int runEndIdx = me.getKey();
- CTRElt run = stf.addNewR();
- String fragment = text.substring(runStartIdx, runEndIdx);
- run.setT(fragment);
- preserveSpaces(run.xgetT());
-
- CTRPrElt fmt = me.getValue();
- if (fmt != null) {
- run.setRPr(fmt);
- }
- runStartIdx = runEndIdx;
- }
- return stf;
- }
-
- private ThemesTable getThemesTable() {
- if(styles == null) return null;
- return styles.getTheme();
- }
- }
|