瀏覽代碼

51519 -- allow users to ignore or include the <rPh> (phonetic run) element in the ReadOnlySharedStringsTable used in the SAX/streaming xlsx reader.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1785965 13f79535-47bb-0310-9956-ffa450edef68
tags/REL_3_16_FINAL
Tim Allison 7 年之前
父節點
當前提交
396cd26693

+ 47
- 9
src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java 查看文件



import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML; import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML;


import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream; import java.io.PushbackInputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List; import java.util.List;

import javax.xml.parsers.ParserConfigurationException;
import java.util.Map;


import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackagePart;
*/ */
private List<String> strings; private List<String> strings;


/**
* Map of phonetic strings (if they exist) indexed
* with the integer matching the index in strings
*/
private Map<Integer, String> phoneticStrings;

/** /**
* @param pkg The {@link OPCPackage} to use as basis for the shared-strings table. * @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
* @throws IOException If reading the data from the package fails. * @throws IOException If reading the data from the package fails.
return strings.get(idx); return strings.get(idx);
} }


/**
* Return the phonetic string at a given index.
* Returns <code>null</code> if no phonetic string
* exists at that index.
* @param idx
* @return
*/
public String getPhoneticStringAt(int idx) {
//avoid an NPE. If the parser hasn't
//yet hit <sst/> phoneticStrings could be null
if (phoneticStrings == null) {
return null;
}
return phoneticStrings.get(idx);
}

public List<String> getItems() { public List<String> getItems() {
return strings; return strings;
} }
//// ContentHandler methods //// //// ContentHandler methods ////


private StringBuffer characters; private StringBuffer characters;
private StringBuffer rphCharacters;
private boolean tIsOpen; private boolean tIsOpen;
private boolean inRPh;


public void startElement(String uri, String localName, String name, public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException { Attributes attributes) throws SAXException {
if (uri != null && ! uri.equals(NS_SPREADSHEETML)) { if (uri != null && ! uri.equals(NS_SPREADSHEETML)) {
return; return;
} }
if ("sst".equals(localName)) { if ("sst".equals(localName)) {
String count = attributes.getValue("count"); String count = attributes.getValue("count");
if(count != null) this.count = Integer.parseInt(count); if(count != null) this.count = Integer.parseInt(count);
if(uniqueCount != null) this.uniqueCount = Integer.parseInt(uniqueCount); if(uniqueCount != null) this.uniqueCount = Integer.parseInt(uniqueCount);


this.strings = new ArrayList<String>(this.uniqueCount); this.strings = new ArrayList<String>(this.uniqueCount);
this.phoneticStrings = new HashMap<Integer, String>();
characters = new StringBuffer(); characters = new StringBuffer();
rphCharacters = new StringBuffer();
} else if ("si".equals(localName)) { } else if ("si".equals(localName)) {
characters.setLength(0); characters.setLength(0);
} else if ("t".equals(localName)) { } else if ("t".equals(localName)) {
tIsOpen = true; tIsOpen = true;
} else if ("rPh".equals(localName)) {
inRPh = true;
} }
} }


if (uri != null && ! uri.equals(NS_SPREADSHEETML)) { if (uri != null && ! uri.equals(NS_SPREADSHEETML)) {
return; return;
} }
if ("si".equals(localName)) { if ("si".equals(localName)) {
strings.add(characters.toString()); strings.add(characters.toString());
if (rphCharacters.length() > 0) {
phoneticStrings.put(strings.size()-1, rphCharacters.toString());
rphCharacters.setLength(0);
}
} else if ("t".equals(localName)) { } else if ("t".equals(localName)) {
tIsOpen = false;
tIsOpen = false;
} else if ("rPh".equals(localName)) {
inRPh = false;
} }
} }


*/ */
public void characters(char[] ch, int start, int length) public void characters(char[] ch, int start, int length)
throws SAXException { throws SAXException {
if (tIsOpen)
characters.append(ch, start, length);
if (tIsOpen) {
if (inRPh) {
rphCharacters.append(ch, start, length);
} else {
characters.append(ch, start, length);
}
}
} }

} }

+ 20
- 6
src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java 查看文件



package org.apache.poi.xssf.eventusermodel; package org.apache.poi.xssf.eventusermodel;


import junit.framework.TestCase;
import java.io.IOException;
import java.util.List;
import java.util.regex.Pattern;


import junit.framework.TestCase;
import org.apache.poi.POIDataSamples; import org.apache.poi.POIDataSamples;
import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackagePart;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst; import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;


import java.io.IOException;
import java.util.List;
import java.util.regex.Pattern;

/** /**
* Tests for {@link org.apache.poi.xssf.eventusermodel.XSSFReader} * Tests for {@link org.apache.poi.xssf.eventusermodel.XSSFReader}
*/ */
} }


} }

public void testPhoneticRuns() throws Exception {
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsx"));
List<PackagePart> parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.xml"));
assertEquals(1, parts.size());

ReadOnlySharedStringsTable rtbl = new ReadOnlySharedStringsTable(parts.get(0));
List<String> strings = rtbl.getItems();
assertEquals(49, strings.size());

assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0));
assertNull(rtbl.getPhoneticStringAt(0));
assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.getEntryAt(3));
assertEquals("\u30CB\u30DB\u30F3", rtbl.getPhoneticStringAt(3));
}

public void testEmptySSTOnPackageObtainedViaWorkbook() throws Exception { public void testEmptySSTOnPackageObtainedViaWorkbook() throws Exception {
XSSFWorkbook wb = new XSSFWorkbook(_ssTests.openResourceAsStream("noSharedStringTable.xlsx")); XSSFWorkbook wb = new XSSFWorkbook(_ssTests.openResourceAsStream("noSharedStringTable.xlsx"));
OPCPackage pkg = wb.getPackage(); OPCPackage pkg = wb.getPackage();

+ 14
- 1
src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java 查看文件

import java.util.regex.Pattern; import java.util.regex.Pattern;


import junit.framework.TestCase; import junit.framework.TestCase;

import org.apache.poi.POITextExtractor; import org.apache.poi.POITextExtractor;
import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.extractor.ExcelExtractor; import org.apache.poi.hssf.extractor.ExcelExtractor;
extractor.close(); extractor.close();
} }
} }

public void testPhoneticRuns() throws Exception {
XSSFExcelExtractor extractor = getExtractor("51519.xlsx");
try {
String text = extractor.getText();
assertTrue(text.contains("\u8C4A\u7530"));
//this shows up only as a phonetic run and should not appear
//in the extracted text
assertFalse(text.contains("\u30CB\u30DB\u30F3"));
} finally {
extractor.close();
}

}
} }

二進制
test-data/spreadsheet/51519.xlsx 查看文件


Loading…
取消
儲存