aboutsummaryrefslogtreecommitdiffstats
path: root/src/scratchpad
diff options
context:
space:
mode:
authorTim Allison <tallison@apache.org>2017-04-11 20:00:12 +0000
committerTim Allison <tallison@apache.org>2017-04-11 20:00:12 +0000
commit77a0bb753fd53886cafe1e18e26b4ff554944fca (patch)
tree1c0ce31b9f5a0d522de506cdd90972ad65e116cc /src/scratchpad
parent4657756a1232aa5b71f4fb863c934468bdd2139b (diff)
downloadpoi-77a0bb753fd53886cafe1e18e26b4ff554944fca.tar.gz
poi-77a0bb753fd53886cafe1e18e26b4ff554944fca.zip
bug 50955 - add logging per Javen's recommendation. Thank you!
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1791021 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/scratchpad')
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java43
1 files changed, 28 insertions, 15 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
index 3705772887..cfc0102062 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
@@ -44,6 +44,8 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.NotImplemented;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
import org.apache.poi.util.StringUtil;
/**
@@ -52,6 +54,9 @@ import org.apache.poi.util.StringUtil;
*/
public class HWPFOldDocument extends HWPFDocumentCore {
+ private static final POILogger logger = POILogFactory
+ .getLogger( HWPFOldDocument.class );
+
private final static Charset DEFAULT_CHARSET = StringUtil.WIN_1252;
private OldTextPieceTable tpt;
@@ -110,6 +115,7 @@ public class HWPFOldDocument extends HWPFDocumentCore {
//if there was a problem with the guessed charset and the length of the
//textpiece, back off to win1252. This is effectively what we used to do.
tp = buildTextPiece(StringUtil.WIN_1252);
+ logger.log(POILogger.WARN, "Error with "+guessedCharset +". Backing off to Windows-1252");
}
tpt.add(tp);
@@ -181,9 +187,9 @@ public class HWPFOldDocument extends HWPFDocumentCore {
/**
- * Take the first codepage that is not default, ansi or symbol.
- * Ideally, we'd want to track fonts with runs, but we don't yet
- * know how to do that.
+ * Try to get the code page from various areas of the document.
+ * Start with the DocumentSummaryInformation, back off to the section info,
+ * finally try the charset information from the font table.
*
* Consider throwing an exception if > 1 unique codepage that is not default, symbol or ansi
* appears here.
@@ -198,26 +204,30 @@ public class HWPFOldDocument extends HWPFDocumentCore {
CustomProperties customProperties = summaryInformation.getCustomProperties();
if (customProperties != null) {
int codePage = customProperties.getCodepage();
- try {
- return Charset.forName(CodePageUtil.codepageToEncoding(codePage));
- } catch (UnsupportedEncodingException e) {
- //swallow
+ if (codePage > -1) {
+ try {
+ return Charset.forName(CodePageUtil.codepageToEncoding(codePage));
+ } catch (UnsupportedEncodingException e) {
+ //swallow
+ }
}
}
- //for now, try to get first valid code page in a valid section
+ //If that didn't work, for now, try to get first valid code page in a valid section
for (Section section : summaryInformation.getSections()) {
if (section.getOffset() < 0) {
continue;
}
int codePage = section.getCodepage();
- try {
- return Charset.forName(CodePageUtil.codepageToEncoding(codePage));
- } catch (UnsupportedEncodingException e) {
- //swallow
+ if (codePage > -1) {
+ try {
+ return Charset.forName(CodePageUtil.codepageToEncoding(codePage));
+ } catch (UnsupportedEncodingException e) {
+ //swallow
+ }
}
}
}
- //if that still doesn't work, pick the first non-default non symbol charset
+ //if that still doesn't work, pick the first non-default, non-symbol charset
for (OldFfn oldFfn : fontTable.getFontNames()) {
HwmfFont.WmfCharset wmfCharset = HwmfFont.WmfCharset.valueOf(oldFfn.getChs()& 0xff);
if (wmfCharset != null &&
@@ -227,6 +237,8 @@ public class HWPFOldDocument extends HWPFDocumentCore {
return wmfCharset.getCharset();
}
}
+ logger.log(POILogger.WARN, "Couldn't find a defined charset; backing off to cp1252");
+ //if all else fails
return DEFAULT_CHARSET;
}
@@ -282,8 +294,9 @@ public class HWPFOldDocument extends HWPFDocumentCore {
}
/**
- * As a rough heuristic (total hack), read through the font table
- * and take the first non-default, non-ansi, non-symbol
+ * As a rough heuristic (total hack), read through the HPSF,
+ * then read through the font table, and take the first
+ * non-default, non-ansi, non-symbol
* font's charset and return that.
*
* Once we figure out how to link a font to a text piece, we should