From: Nick Burch Date: Tue, 12 Aug 2008 19:57:04 +0000 (+0000) Subject: Add HWPF support for stripping out fields (eg macros), and make this optionally happe... X-Git-Tag: REL_3_2_FINAL~160 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=97e8e39eb675fe57f95416c7dfd4b22df9402ae8;p=poi.git Add HWPF support for stripping out fields (eg macros), and make this optionally happen always for headers and footers git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@685283 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java index 63c6a18ca7..deaffc79af 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java @@ -230,4 +230,12 @@ public class WordExtractor extends POIOLE2TextExtractor { return ret.toString(); } + + /** + * Removes any fields (eg macros, page markers etc) + * from the string. + */ + public static String stripFields(String text) { + return Range.stripFields(text); + } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/HeaderStories.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/HeaderStories.java index 95cee57d1b..8300361720 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/HeaderStories.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/HeaderStories.java @@ -35,6 +35,8 @@ public class HeaderStories { private Range headerStories; private PlexOfCps plcfHdd; + private boolean stripFields = false; + public HeaderStories(HWPFDocument doc) { this.headerStories = doc.getHeaderStoryRange(); FileInformationBlock fib = doc.getFileInformationBlock(); @@ -157,8 +159,15 @@ public class HeaderStories { return ""; } - // Return the contents - return headerStories.text().substring(prop.getStart(), prop.getEnd()); + // Grab the contents + String text = + headerStories.text().substring(prop.getStart(), prop.getEnd()); + + // Strip off fields and macros if requested + if(stripFields) { + return Range.stripFields(text); + } + return text; } public Range getRange() { @@ -167,4 +176,22 @@ public class HeaderStories { protected PlexOfCps getPlcfHdd() { return plcfHdd; } + + /** + * Are fields currently being stripped from + * the text that this {@link HeaderStories} returns? + * Default is false, but can be changed + */ + public boolean areFieldsStripped() { + return stripFields; + } + /** + * Should fields (eg macros) be stripped from + * the text that this class returns? + * Default is not to strip. + * @param stripFields + */ + public void setAreFieldsStripped(boolean stripFields) { + this.stripFields = stripFields; + } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java index 0a145d5fd4..d9b679e42d 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java @@ -299,6 +299,63 @@ public class Range } return sb.toString(); } + + /** + * Removes any fields (eg macros, page markers etc) + * from the string. + * Normally used to make some text suitable for showing + * to humans, and the resultant text should not normally + * be saved back into the document! + */ + public static String stripFields(String text) { + // First up, fields can be nested... + // A field can be 0x13 [contents] 0x15 + // Or it can be 0x13 [contents] 0x14 [real text] 0x15 + + // If there are no fields, all easy + if(text.indexOf('\u0013') == -1) return text; + + // Loop over until they're all gone + // That's when we're out of both 0x13s and 0x15s + while( text.indexOf('\u0013') > -1 && + text.indexOf('\u0015') > -1) { + int first13 = text.indexOf('\u0013'); + int next13 = text.indexOf('\u0013', first13+1); + int first14 = text.indexOf('\u0014', first13+1); + int last15 = text.lastIndexOf('\u0015'); + + // If they're the wrong way around, give up + if(last15 < first13) { + break; + } + + // If no more 13s and 14s, just zap + if(next13 == -1 && first14 == -1) { + text = text.substring(0, first13) + + text.substring(last15+1); + break; + } + + // If a 14 comes before the next 13, then + // zap from the 13 to the 14, and remove + // the 15 + if(first14 != -1 && (first14 < next13 || next13 == -1)) { + text = text.substring(0, first13) + + text.substring(first14+1, last15) + + text.substring(last15+1); + continue; + } + + // Another 13 comes before the next 14. + // This means there's nested stuff, so we + // can just zap the lot + text = text.substring(0, first13) + + text.substring(last15+1); + continue; + } + + return text; + } /** * Used to get the number of sections in a range. If this range is smaller diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/HeaderWithMacros.doc b/src/scratchpad/testcases/org/apache/poi/hwpf/data/HeaderWithMacros.doc new file mode 100644 index 0000000000..934970f58b Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/HeaderWithMacros.doc differ diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHeaderStories.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHeaderStories.java index d4d2517f90..404f6e47a4 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHeaderStories.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHeaderStories.java @@ -35,6 +35,7 @@ public class TestHeaderStories extends TestCase { private HWPFDocument oddEven; private HWPFDocument diffFirst; private HWPFDocument unicode; + private HWPFDocument withFields; protected void setUp() throws Exception { String dirname = System.getProperty("HWPF.testdata.path"); @@ -60,6 +61,9 @@ public class TestHeaderStories extends TestCase { unicode = new HWPFDocument( new FileInputStream(new File(dirname, "HeaderFooterUnicode.doc")) ); + withFields = new HWPFDocument( + new FileInputStream(new File(dirname, "HeaderWithMacros.doc")) + ); } public void testNone() throws Exception { @@ -186,4 +190,15 @@ public class TestHeaderStories extends TestCase { assertEquals("\r\r", hs.getEvenFooter()); assertEquals("The footer, with Moli\u00e8re, has Unicode in it.\r\r", hs.getOddFooter()); } + + public void testWithFields() throws Exception { + HeaderStories hs = new HeaderStories(withFields); + assertFalse(hs.areFieldsStripped()); + + assertEquals("HEADER GOES HERE. 8/12/2008 \u0013 AUTHOR \\* MERGEFORMAT \u0014Eric Roch\u0015\r\r\r", hs.getOddHeader()); + + // Now turn on stripping + hs.setAreFieldsStripped(true); + assertEquals("HEADER GOES HERE. 8/12/2008 Eric Roch\r\r\r", hs.getOddHeader()); + } } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java index db28cbd456..6b4200a631 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java @@ -18,7 +18,6 @@ package org.apache.poi.hwpf.usermodel; import java.io.File; import java.io.FileInputStream; -import java.io.FileOutputStream; import junit.framework.TestCase; diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRange.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRange.java new file mode 100644 index 0000000000..bcb03996ab --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRange.java @@ -0,0 +1,53 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.poi.hwpf.usermodel; + +import junit.framework.TestCase; + +/** + * Tests for Range which aren't around deletion, insertion, + * text replacement or textual contents + */ +public class TestRange extends TestCase { + public void testFieldStripping() throws Exception { + String exp = "This is some text."; + + String single = "This is some \u0013Blah!\u0015text."; + String with14 = "This is \u0013Blah!\u0014some\u0015 text."; + String withNested = + "This is \u0013Blah!\u0013Blah!\u0015\u0015some text."; + String withNested14 = + "This is \u0013Blah!\u0013Blah!\u0014don't see me\u0015 blah!\u0015some text."; + String withNestedIn14 = + "This is \u0013Blah!\u0014some\u0013Blah!\u0015 \u0015text."; + + // Check all comes out right + assertEquals(exp, Range.stripFields(exp)); + assertEquals(exp, Range.stripFields(single)); + assertEquals(exp, Range.stripFields(with14)); + assertEquals(exp, Range.stripFields(withNested)); + assertEquals(exp, Range.stripFields(withNested14)); + assertEquals(exp, Range.stripFields(withNestedIn14)); + + // Ones that are odd and we won't change + String odd1 = "This\u0015 is \u0013 odd"; + String odd2 = "This\u0015 is \u0014 also \u0013 odd"; + + assertEquals(odd1, Range.stripFields(odd1)); + assertEquals(odd2, Range.stripFields(odd2)); + } +}