Przeglądaj źródła

Bug 64244: Take the replacement of RichtText strings into account when computing length of strings

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1875571 13f79535-47bb-0310-9956-ffa450edef68
tags/before_ooxml_3rd_edition
Dominik Stadler 4 lat temu
rodzic
commit
9649ca955a

+ 35
- 9
src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java Wyświetl plik

@@ -201,7 +201,7 @@ public class XSSFRichTextString implements RichTextString {
CTRElt lt = st.addNewR();
lt.setT(text);
preserveSpaces(lt.xgetT());
if (font != null) {
CTRPrElt pr = lt.addNewRPr();
setRunAttributes(font.getCTFont(), pr);
@@ -247,7 +247,7 @@ public class XSSFRichTextString implements RichTextString {
}

/**
* Does this string have any explicit formatting applied, or is
* Does this string have any explicit formatting applied, or is
* it just text in the default style?
*/
public boolean hasFormatting() {
@@ -495,6 +495,32 @@ public class XSSFRichTextString implements RichTextString {
}
}

/**
* Optimized counting of actual length of a string
* considering the replacement of _xHHHH_ that needs
* to be applied to rich-text strings.
*
* @param value The string
* @return The length of the string, 0 if the string is null.
*/
static int utfLength(String value) {
if(value == null) {
return 0;
}
if (!value.contains("_x")) {
return value.length();
}

Matcher matcher = utfPtrn.matcher(value);
int count = 0;
while (matcher.find()) {
count++;
}

// Length of pattern is 7 (_xHHHH_), and we replace it with one character
return value.length() - (count * 6);
}

/**
* For all characters which cannot be represented in XML as defined by the XML 1.0 specification,
* the characters are escaped using the Unicode numerical character representation escape character
@@ -512,7 +538,7 @@ public class XSSFRichTextString implements RichTextString {
if(value == null || !value.contains("_x")) {
return value;
}
StringBuilder buf = new StringBuilder();
Matcher m = utfPtrn.matcher(value);
int idx = 0;
@@ -528,13 +554,13 @@ public class XSSFRichTextString implements RichTextString {

idx = m.end();
}
// small optimization: don't go via StringBuilder if not necessary,
// the encodings are very rare, so we should almost always go via this shortcut.
// small optimization: don't go via StringBuilder if not necessary,
// the encodings are very rare, so we should almost always go via this shortcut.
if(idx == 0) {
return value;
}
buf.append(value.substring(idx));
return buf.toString();
}
@@ -577,7 +603,7 @@ public class XSSFRichTextString implements RichTextString {
String txt = r.getT();
CTRPrElt fmt = r.getRPr();

length += txt.length();
length += utfLength(txt);
formats.put(length, fmt);
}
return formats;
@@ -605,7 +631,7 @@ public class XSSFRichTextString implements RichTextString {
}
return stf;
}
private ThemesTable getThemesTable() {
if(styles == null) return null;
return styles.getTheme();

+ 25
- 0
src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java Wyświetl plik

@@ -563,4 +563,29 @@ public final class TestXSSFRichTextString {
assertEquals("Rich Text\r\nTest", wb.getSheetAt(0).getRow(1).getCell(1).getRichStringCellValue().getString());
wb.close();
}

@Test
public void testUtfDecode_withApplyFont() {
XSSFFont font = new XSSFFont();
font.setBold(true);
font.setFontHeightInPoints((short) 14);

CTRst st = CTRst.Factory.newInstance();
st.setT("abc_x000D_2ef_x000D_");
XSSFRichTextString rt = new XSSFRichTextString(st);
rt.applyFont(font);
assertEquals("abc\r2ef\r", rt.getString());
}

@Test
public void testUtfLength() {
assertEquals(0, XSSFRichTextString.utfLength(null));
assertEquals(0, XSSFRichTextString.utfLength(""));

assertEquals(3, XSSFRichTextString.utfLength("abc"));
assertEquals(3, XSSFRichTextString.utfLength("ab_x0032_"));
assertEquals(3, XSSFRichTextString.utfLength("a_x0032__x0032_"));
assertEquals(3, XSSFRichTextString.utfLength("_x0032_a_x0032_"));
assertEquals(3, XSSFRichTextString.utfLength("_x0032__x0032_a"));
}
}

Ładowanie…
Anuluj
Zapisz