浏览代码

Merged revisions 638786-638802,638805-638811,638813-638814,638816-639230,639233-639241,639243-639253,639255-639486,639488-639601,639603-639835,639837-639917,639919-640056,640058-640710,640712-641156,641158-641184,641186-641795,641797-641798,641800-641933,641935-641963,641965-641966,641968-641995,641997-642230,642232-642562,642564-642565,642568-642570,642572-642573,642576-642736,642739-642877,642879,642881-642890,642892-642903,642905-642945,642947-643624,643626-643653,643655-643669,643671,643673-643830,643832-643833,643835-644342,644344-644472,644474-644508,644510-645347,645349-645351,645353-645559,645561-645565,645568-645951,645953-646193,646195-646311,646313-646404,646406-646665,646667-646853,646855-646869,646871-647151,647153-647185,647187-647277,647279-647566,647568-647573,647575,647578-647711,647714-647737,647739-647823,647825-648155,648157-648202,648204-648273,648275,648277-648302,648304-648333,648335-648588,648590-648622,648625-648673,648675-649141,649144,649146-649556,649558-649795,649799,649801-649910,649912-649913,649915-650128,650131-650132,650134-650137,650140-650914,650916-651991,651993-652284,652286-652287,652289,652291,652293-652297,652299-652328,652330-652425,652427-652445,652447-652560,652562-652933,652935,652937-652993,652995-653116,653118-653124,653126-653483,653487-653519,653522-653550,653552-653607,653609-653667,653669-653674,653676-653814,653817-653830,653832-653891,653893-653944,653946-654055,654057-654355,654357-654365,654367-654648,654651-655215,655217-655277,655279-655281,655283-655911,655913-656212,656214,656216-656251,656253-656698,656700-656756,656758-656892,656894-657135,657137-657165,657168-657179,657181-657354,657356-657357,657359-657701,657703-657874,657876-658032,658034-658284,658286,658288-658301,658303-658307,658309-658321,658323-658335,658337-658348,658351,658353-658832,658834-658983,658985,658987-659066,659068-659402,659404-659428,659430-659451,659453-659454,659456-659461,659463-659477,659479-659524,659526-659571,659574,659576-660255,660257-660262,660264-660279,660281-660343,660345-660473,660475-660827,660829-660833,660835-660888,660890-663321,663323-663435,663437-663764,663766-663854,663856-664219,664221-664489,664494-664514,664516-668013,668015-668142,668144-668152,668154,668156-668256,668258,668260-669139,669141-669455,669457-669657,669659-669808,669810-670189,670191-671321,671323-672229,672231-672549,672551-672552,672554-672561,672563-672566,672568,672571-673049,673051-673852,673854-673862,673864-673986,673988-673996,673998-674347,674349-674890,674892-674910,674912-674936,674938-674952,674954-675078,675080-675085,675087-675217,675219-675660,675662-675670,675672-675716,675718-675726,675728-675733,675735-675775,675777-675782,675784,675786-675791,675794-675852,675854-676200,676202,676204,676206-676220,676222-676309,676311-676456,676458-676994,676996-677027,677030-677040,677042-677056,677058-677375,677377-677968,677970-677971,677973,677975-677994,677996-678286,678288-678538,678540-680393,680395-680469,680471-680529,680531-680852,680854-681529,681531-681571,681573-682224,682226,682228,682231-682281,682283-682335,682337-682507,682509,682512-682517,682519-682532,682534-682619,682622-682777,682779-682998,683000-683019,683021-683022,683024-683080,683082-683092,683094-683095,683097-683127,683129-683131,683133-683166,683168-683698,683700-683705,683707-683757,683759-683787,683789-683870,683872-683879,683881-683900,683902-684066,684068-684074,684076-684222,684224-684254,684257-684281,684283-684286,684288-684292,684294-684298,684300-684301,684303-684308,684310-684317,684320,684323-684335,684337-684348,684350-684354,684356-684361,684363-684369,684371-684453,684455-684883,684885-684937,684940-684958,684960-684970,684972-684985,684987-685053,685055-685063,685065-685259,685261-685262,685264-685266,685268-685282,685285-686035,686037-686045,686047-686052,686054-687331 via svnmerge from

https://svn.apache.org/repos/asf/poi/trunk

........
  r686207 | nick | 2008-08-15 13:43:02 +0100 (Fri, 15 Aug 2008) | 1 line
  
  Add sample publisher files from bug #45602 to svn
........
  r686216 | nick | 2008-08-15 15:05:30 +0100 (Fri, 15 Aug 2008) | 1 line
  
  Add a few more source package excludes
........
  r686278 | nick | 2008-08-15 17:57:30 +0100 (Fri, 15 Aug 2008) | 1 line
  
  More sample hpbf docs, with a description
........
  r686290 | nick | 2008-08-15 18:42:25 +0100 (Fri, 15 Aug 2008) | 1 line
  
  Start on a HPBF dumper
........
  r686621 | nick | 2008-08-17 17:36:40 +0100 (Sun, 17 Aug 2008) | 1 line
  
  Few little tweaks to dev helpers
........
  r686624 | nick | 2008-08-17 18:39:10 +0100 (Sun, 17 Aug 2008) | 1 line
  
  More work understanding hpbf
........
  r686625 | nick | 2008-08-17 19:02:31 +0100 (Sun, 17 Aug 2008) | 1 line
  
  More work understanding hpbf
........
  r686628 | nick | 2008-08-17 19:21:34 +0100 (Sun, 17 Aug 2008) | 1 line
  
  More work understanding hpbf
........
  r686640 | nick | 2008-08-17 21:15:51 +0100 (Sun, 17 Aug 2008) | 1 line
  
  Further HPBF documentation, and some more sample files used
........
  r686844 | yegor | 2008-08-18 19:33:58 +0100 (Mon, 18 Aug 2008) | 1 line
  
  fixed bug #45645: Fix for HSSFSheet.autoSizeColumn() for widths exceeding Short.MAX_VALUE
........
  r686977 | josh | 2008-08-19 08:44:57 +0100 (Tue, 19 Aug 2008) | 1 line
  
  Fix for bug 45640 - avoid creating multiple GUTS records
........


git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@687333 13f79535-47bb-0310-9956-ffa450edef68
tags/REL_3_5_BETA3
Nick Burch 16 年前
父节点
当前提交
48ef5fb2d3
共有 24 个文件被更改,包括 800 次插入59 次删除
  1. 9
    0
      build.xml
  2. 1
    0
      src/documentation/content/xdocs/book.xml
  3. 1
    0
      src/documentation/content/xdocs/changes.xml
  4. 170
    0
      src/documentation/content/xdocs/hpbf/file-format.xml
  5. 53
    0
      src/documentation/content/xdocs/hpbf/index.xml
  6. 15
    11
      src/documentation/content/xdocs/index.xml
  7. 1
    0
      src/documentation/content/xdocs/status.xml
  8. 20
    37
      src/java/org/apache/poi/hssf/model/Sheet.java
  9. 2
    1
      src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java
  10. 25
    8
      src/java/org/apache/poi/poifs/dev/POIFSLister.java
  11. 353
    0
      src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java
  12. 二进制
      src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.pub
  13. 29
    0
      src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.txt
  14. 二进制
      src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub
  15. 34
    0
      src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt
  16. 二进制
      src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2000.pub
  17. 二进制
      src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub
  18. 29
    0
      src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt
  19. 二进制
      src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub
  20. 29
    0
      src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt
  21. 二进制
      src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample98.pub
  22. 二进制
      src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleBrochure.pub
  23. 二进制
      src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleNewsletter.pub
  24. 29
    2
      src/testcases/org/apache/poi/hssf/model/TestSheet.java

+ 9
- 0
build.xml 查看文件

@@ -649,6 +649,7 @@ under the License.
<sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
<sysproperty key="HPBF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hpbf/data"/>
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="OOXML.testdata.path" file="${ooxml.src.test}/org/apache/poi/ooxml/data"/>
<sysproperty key="java.awt.headless" value="true"/>
@@ -707,6 +708,7 @@ under the License.
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
<sysproperty key="HPBF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hpbf/data"/>
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain"/>
@@ -742,6 +744,7 @@ under the License.
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
<sysproperty key="HPBF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hpbf/data"/>
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<sysproperty key="java.awt.headless" value="true"/>
@@ -1284,10 +1287,13 @@ FORREST_HOME environment variable!</echo>
<zipfileset dir="." prefix="${zipdir}">
<exclude name="build/**"/>
<exclude name="scripts/**"/>
<exclude name="TEST*"/>
<exclude name="*.ipr"/>
<exclude name="*.iml"/>
<exclude name="*.iws"/>
<exclude name="*.swp"/>
<exclude name=".classpath"/>
<exclude name=".project"/>
</zipfileset>
</zip>

@@ -1313,10 +1319,13 @@ FORREST_HOME environment variable!</echo>
<tarfileset dir="." prefix="${zipdir}">
<exclude name="build/**"/>
<exclude name="scripts/**"/>
<exclude name="TEST*"/>
<exclude name="*.ipr"/>
<exclude name="*.iml"/>
<exclude name="*.iws"/>
<exclude name="*.swp"/>
<exclude name=".classpath"/>
<exclude name=".project"/>
</tarfileset>
</tar>


+ 1
- 0
src/documentation/content/xdocs/book.xml 查看文件

@@ -41,6 +41,7 @@
<menu-item label="HSLF" href="slideshow/index.html"/>
<menu-item label="HSMF" href="hsmf/index.html"/>
<menu-item label="HDGF" href="hdgf/index.html"/>
<menu-item label="HPBF" href="hpbf/index.html"/>
<menu-item label="POI-Ruby" href="poi-ruby.html"/>
<menu-item label="POI-Utils" href="utils/index.html"/>
<menu-item label="Text Extraction" href="text-extraction.html"/>

+ 1
- 0
src/documentation/content/xdocs/changes.xml 查看文件

@@ -64,6 +64,7 @@
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
</release>
<release version="3.1.1-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">45645 - Fix for HSSFSheet.autoSizeColumn() for widths exceeding Short.MAX_VALUE</action>
<action dev="POI-DEVELOPERS" type="add">45623 - Support for additional HSSF header and footer fields, including bold and full file path</action>
<action dev="POI-DEVELOPERS" type="add">45623 - Support stripping HSSF header and footer fields (eg page number) out of header and footer text if required</action>
<action dev="POI-DEVELOPERS" type="add">45622 - Support stripping HWPF fields (eg macros) out of text, via Range.stripFields(text)</action>

+ 170
- 0
src/documentation/content/xdocs/hpbf/file-format.xml 查看文件

@@ -0,0 +1,170 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
====================================================================
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "../dtd/document-v11.dtd">

<document>
<header>
<title>POI-HPBF - A Guide to the Publisher File Format</title>
<subtitle>Overview</subtitle>
<authors>
<person name="Nick Burch" email="nick at torchbox dot com"/>
</authors>
</header>

<body>
<section><title>Document Streams</title>
<p>
The file is made up of a number of POIFS streams. A typical
file will be made up as follows:
</p>
<source>
Root Entry -
Objects -
(no children)
SummaryInformation &lt;(0x05)SummaryInformation&gt;
DocumentSummaryInformation &lt;(0x05)DocumentSummaryInformation&gt;
Escher -
EscherStm
EscherDelayStm
Quill -
QuillSub -
CONTENTS
CompObj &lt;(0x01)CompObj&gt;
Envelope
Contents
Internal &lt;(0x03)Internal&gt;
CompObj &lt;(0x01)CompObj&gt;
VBA -
(no children)
</source>
</section>
<section><title>Changing Text</title>
<p>If you make a change to the text of a file, but not change
how much text there is, then the <em>CONTENTS</em> stream
will undergo a small change, and the <em>Contents</em> stream
will undergo a large change.</p>
<p>If you make a change to the text of a file, and change the
amount of text there is, then both the <em>Contents</em> and
the <em>CONTENTS</em> streams change.</p>
</section>
<section><title>Changing Shapes</title>
<p>If you alter the size of a textbox, but make no text changes,
then both <em>Contents</em> and <em>CONTENTS</em> streams
change. There are no changes to the Escher streams.</p>
<p>If you set the background colour of a textbox, but make
no changes to the text, (to finish off)</p>
</section>
<section><title>Structure of CONTENTS</title>
<p>First we have "CHNKINK ", followed by 24 bytes.</p>
<p>Next we have 20 sequences of 24 bytes each. If the first two bytes
at 0x1800, then that sequence entry exists, but if it's 0x0000 then
the entry doesn't exist. If it does exist, we then have 4 bytes of
upper case ASCII text, followed by three little endian shorts.
The first of these seems to be the count of that type, the second is
usually 1, the third is usually zero. The we have another 4 bytes of
upper case ASCII text, normally but not always the same as the first
text. Finally, we have an unsigned little endian 32 bit offset to
the start of the data for this, then an unsigned little endian
32 bit offset of the length of this section.</p>
<p>Normally, the first sequence entry is for TEXT, and the text data
will start at 0x200. After that is normally two or three STSH entries
(so the first short has values 0, then 1, then 2). After that it
seems to vary.</p>
<p>At 0x200 we have the text, stored as little endian 16 bit unicode.</p>
<p>After the text comes all sorts of other stuff, presumably as
described by the sequences.</p>
<p>For a contents stream of length 7168 / 0x1c00 bytes, the start
looks something like:</p>
<source>
CHNKINK // "CHNKINK "
04 00 07 00 // Normally 04 00 07 00
13 00 00 03 // Normally ## 00 00 03
00 02 00 00 // Normally 00 ## 00 00
00 1c 00 00 // Normally length of the stream
f8 01 13 00 // Normally f8 01 11/13 00
ff ff ff ff // Normally seems to be ffffffff

18 00
TEXT 00 00 01 00 00 00 // TEXT 0 1 0
TEXT 00 02 00 00 d0 03 00 00 // TEXT from: 200 (512), len: 3d0 (976)
18 00
STSH 00 00 01 00 00 00 // STSH 0 1 0
STSH d0 05 00 00 1e 00 00 00 // STSH from: 5d0 (1488), len: 1e (30)
18 00
STSH 01 00 01 00 00 00 // STSH 1 1 0
STSH ee 05 00 00 b8 01 00 00 // STSH from: 5ee (1518), len: 1b8 (440)
18 00
STSH 02 00 01 00 00 00 // STSH 2 1 0
STSH a6 07 00 00 3c 00 00 00 // STSH from: 7a6 (1958), len: 3c (60)
18 00
FDPP 00 00 01 00 00 00 // FDPP 0 1 0
FDPP 00 08 00 00 00 02 00 00 // FDPP from: 800 (2048), len: 200 (512)
18 00
FDPC 00 00 01 00 00 00 // FDPC 0 1 0
FDPC 00 0a 00 00 00 02 00 00 // FDPC from: a00 (2560), len: 200 (512)
18 00
FDPC 01 00 01 00 00 00 // FDPC 1 1 0
FDPC 00 0c 00 00 00 02 00 00 // FDPC from: c00 (3072), len: 200 (512)
18 00
SYID 00 00 01 00 00 00 // SYID 0 1 0
SYID 00 0e 00 00 20 00 00 00 // SYID from: e00 (3584), len: 20 (32)
18 00
SGP 00 00 01 00 00 00 // SGP 0 1 0
SGP 20 0e 00 00 0a 00 00 00 // SGP from: e20 (3616), len: a (10)
18 00
INK 00 00 01 00 00 00 // INK 0 1 0
INK 2a 0e 00 00 04 00 00 00 // INK from: e2a (3626), len: 4 (4)
18 00
BTEP 00 00 01 00 00 00 // BTEP 0 1 0
PLC 2e 0e 00 00 18 00 00 00 // PLC from: e2e (3630), len: 18 (24)
18 00
BTEC 00 00 01 00 00 00 // BTEC 0 1 0
PLC 46 0e 00 00 20 00 00 00 // PLC from: e46 (3654), len: 20 (32)
18 00
FONT 00 00 01 00 00 00 // FONT 0 1 0
FONT 66 0e 00 00 48 03 00 00 // FONT from: e66 (3686), len: 348 (840)
18 00
TCD 03 00 01 00 00 00 // TCD 3 1 0
PLC ae 11 00 00 24 00 00 00 // PLC from: 11ae (4526), len: 24 (36)
18 00
TOKN 04 00 01 00 00 00 // TOKN 4 1 0
PLC d2 11 00 00 0a 01 00 00 // PLC from: 11d2 (4562), len: 10a (266)
18 00
TOKN 05 00 01 00 00 00 // TOKN 5 1 0
PLC dc 12 00 00 2a 01 00 00 // PLC from: 12dc (4828), len: 12a (298)
18 00
STRS 00 00 01 00 00 00 // STRS 0 1 0
PLC 06 14 00 00 46 00 00 00 // PLC from: 1406 (5126), len: 46 (70)
18 00
MCLD 00 00 01 00 00 00 // MCLD 0 1 0
MCLD 4c 14 00 00 16 06 00 00 // MCLD from: 144c (5196), len: 616 (1558)
18 00
PL 00 00 01 00 00 00 // PL 0 1 0
PL 62 1a 00 00 48 00 00 00 // PL from: 1a62 (6754), len: 48 (72)
00 00 // Blank entry follows
00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00

(the text will then start)
</source>
</section>
</body>
</document>

+ 53
- 0
src/documentation/content/xdocs/hpbf/index.xml 查看文件

@@ -0,0 +1,53 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
====================================================================
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "../dtd/document-v11.dtd">

<document>
<header>
<title>POI-HPBF - Java API To Access Microsoft Publisher Format Files</title>
<subtitle>Overview</subtitle>
<authors>
<person name="Nick Burch" email="nick at apache dot org"/>
</authors>
</header>

<body>
<section>
<title>Overview</title>

<p>HPBF is the POI Project's pure Java implementation of the Visio file format.</p>
<p>Currently, HPBF is in the experimental stage, while we try
to figure out the file format. Our initial aim is to provide
a text extractor for the format, with low level code following
after that if demand and developer interest warrant it.</p>
<p>At this time, there is no <em>usermodel</em> api or similar.</p>
<p>Our current understanding of the file format is documented
<link href="file-format.html">here</link>.</p>
<note>
This code currently lives the
<link href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">scratchpad area</link>
of the POI SVN repository.
Ensure that you have the scratchpad jar or the scratchpad
build area in your
classpath before experimenting with this code.
</note>
</section>
</body>
</document>

+ 15
- 11
src/documentation/content/xdocs/index.xml 查看文件

@@ -141,26 +141,30 @@
href="./slideshow/index.html">the HSLF project page for more
information</link>.</p>
</section>
<section><title>HDGF for Visio Documents</title>
<p>HDGF is our port of the Microsoft Viso 97(-2003) file format to pure
Java. It currently only supports reading at a very low level, and
simple text extraction. Please see <link
href="./hdgf/index.html">the HDGF project page for more
information</link>.</p>
</section>
<section><title>HPSF for Document Properties</title>
<p>HPSF is our port of the OLE 2 property set format to pure
Java. Property sets are mostly use to store a document's properties
(title, author, date of last modification etc.), but they can be used
for application-specific purposes as well.</p>

<p>HPSF supports reading and writing of properties. However, you will
need to be using version 3.0 of POI to utilise the write support.</p>

<p>HPSF supports both reading and writing of properties.</p>
<p>Please see <link href="./hpsf/index.html">the HPSF project
page</link> for more information.</p>
</section>

<section><title>HDGF for Visio Documents</title>
<p>HDGF is our port of the Microsoft Viso 97(-2003) file format to pure
Java. It currently only supports reading at a very low level, and
simple text extraction. Please see <link
href="./hdgf/index.html">the HDGF project page for more
information</link>.</p>
</section>
<section><title>HPBF for Publisher Documents</title>
<p>HPBF is our port of the Microsoft Publisher 98(-2007) file format to pure
Java. At the moment, we are still figuring out the file format, but we hope
to have simple text extraction shortly. Please see <link
href="./hpbf/index.html">the HPBF project page for more
information</link>.</p>
</section>
</section>

<section><title>Contributing </title>

+ 1
- 0
src/documentation/content/xdocs/status.xml 查看文件

@@ -61,6 +61,7 @@
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
</release>
<release version="3.1.1-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">45645 - Fix for HSSFSheet.autoSizeColumn() for widths exceeding Short.MAX_VALUE</action>
<action dev="POI-DEVELOPERS" type="add">45623 - Support for additional HSSF header and footer fields, including bold and full file path</action>
<action dev="POI-DEVELOPERS" type="add">45623 - Support stripping HSSF header and footer fields (eg page number) out of header and footer text if required</action>
<action dev="POI-DEVELOPERS" type="add">45622 - Support stripping HWPF fields (eg macros) out of text, via Range.stripFields(text)</action>

+ 20
- 37
src/java/org/apache/poi/hssf/model/Sheet.java 查看文件

@@ -106,7 +106,7 @@ public final class Sheet implements Model {

protected ArrayList records = null;
int preoffset = 0; // offset of the sheet in a new file
protected int dimsloc = -1; // TODO - is it legal for dims record to be missing?
protected int dimsloc = -1; // TODO - remove dimsloc
protected PrintGridlinesRecord printGridlines = null;
protected GridsetRecord gridset = null;
private GutsRecord _gutsRecord;
@@ -125,7 +125,8 @@ public final class Sheet implements Model {
private MergedCellsTable _mergedCellsTable;
/** always present in this POI object, not always written to Excel file */
/*package*/ColumnInfoRecordsAggregate _columnInfos;
protected DimensionsRecord dims;
/** the DimensionsRecord is always present */
private DimensionsRecord _dimensions;
protected RowRecordsAggregate _rowsAggregate = null;
private DataValidityTable _dataValidityTable= null;
private ConditionalFormattingTable condFormatting;
@@ -287,7 +288,7 @@ public final class Sheet implements Model {
records.add(retval._columnInfos);
}

retval.dims = ( DimensionsRecord ) rec;
retval._dimensions = ( DimensionsRecord ) rec;
retval.dimsloc = records.size();
}
else if (rec.getSid() == DefaultColWidthRecord.sid)
@@ -333,7 +334,7 @@ public final class Sheet implements Model {

records.add(rec);
}
if (retval.dimsloc < 0) {
if (retval._dimensions == null) {
throw new RuntimeException("DimensionsRecord was not found");
}
retval.records = records;
@@ -404,6 +405,8 @@ public final class Sheet implements Model {

public static Sheet createSheet()
{
// TODO - convert this method to a constructor

if (log.check( POILogger.DEBUG ))
log.log(POILogger.DEBUG, "Sheet createsheet from scratch called");
Sheet retval = new Sheet();
@@ -423,7 +426,8 @@ public final class Sheet implements Model {
records.add( retval.printGridlines );
retval.gridset = createGridset();
records.add( retval.gridset );
records.add( retval.createGuts() );
retval._gutsRecord = createGuts();
records.add( retval._gutsRecord );
retval.defaultrowheight = createDefaultRowHeight();
records.add( retval.defaultrowheight );
records.add( retval.createWSBool() );
@@ -440,8 +444,8 @@ public final class Sheet implements Model {
ColumnInfoRecordsAggregate columns = new ColumnInfoRecordsAggregate();
records.add( columns );
retval._columnInfos = columns;
retval.dims = createDimensions();
records.add(retval.dims);
retval._dimensions = createDimensions();
records.add(retval._dimensions);
retval.dimsloc = records.size()-1;
records.add(retval.windowTwo = retval.createWindowTwo());
retval.selection = createSelection();
@@ -460,7 +464,7 @@ public final class Sheet implements Model {
if (_rowsAggregate == null)
{
_rowsAggregate = new RowRecordsAggregate();
records.add(getDimsLoc() + 1, _rowsAggregate);
records.add(dimsloc + 1, _rowsAggregate);
}
}
private MergedCellsTable getMergedRecords() {
@@ -556,10 +560,10 @@ public final class Sheet implements Model {
.append(lastrow).append("lastcol").append(lastcol)
.toString());
}
dims.setFirstCol(firstcol);
dims.setFirstRow(firstrow);
dims.setLastCol(lastcol);
dims.setLastRow(lastrow);
_dimensions.setFirstCol(firstcol);
_dimensions.setFirstRow(firstrow);
_dimensions.setLastCol(lastcol);
_dimensions.setLastRow(lastrow);
if (log.check( POILogger.DEBUG ))
log.log(POILogger.DEBUG, "Sheet.setDimensions exiting");
}
@@ -696,7 +700,7 @@ public final class Sheet implements Model {
if(log.check(POILogger.DEBUG)) {
log.log(POILogger.DEBUG, "add value record row" + row);
}
DimensionsRecord d = ( DimensionsRecord ) records.get(getDimsLoc());
DimensionsRecord d = _dimensions;

if (col.getColumn() > d.getLastCol())
{
@@ -720,8 +724,8 @@ public final class Sheet implements Model {
*/
public void removeValueRecord(int row, CellValueRecordInterface col) {

log.logFormatted(POILogger.DEBUG, "remove value record row,dimsloc %,%",
new int[]{row, dimsloc} );
log.logFormatted(POILogger.DEBUG, "remove value record row %",
new int[]{row } );
_rowsAggregate.removeCell(col);
}

@@ -766,7 +770,7 @@ public final class Sheet implements Model {
checkRows();
if (log.check( POILogger.DEBUG ))
log.log(POILogger.DEBUG, "addRow ");
DimensionsRecord d = ( DimensionsRecord ) records.get(getDimsLoc());
DimensionsRecord d = _dimensions;

if (row.getRowNumber() >= d.getLastRow())
{
@@ -1330,27 +1334,6 @@ public final class Sheet implements Model {
}
}

/**
* get the location of the DimensionsRecord (which is the last record before the value section)
* @return location in the array of records of the DimensionsRecord
*/

public int getDimsLoc()
{
if (log.check( POILogger.DEBUG ))
log.log(POILogger.DEBUG, "getDimsLoc dimsloc= " + dimsloc);
return dimsloc;
}

/**
* in the event the record is a dimensions record, resets both the loc index and dimsloc index
*/
public void checkDimsLoc(Record rec, int recloc) {
if (rec.getSid() == DimensionsRecord.sid) {
dimsloc = recloc;
}
}

/**
* @return the serialized size of this sheet
*/

+ 2
- 1
src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java 查看文件

@@ -1806,10 +1806,11 @@ public class HSSFSheet implements org.apache.poi.ss.usermodel.Sheet

}
if (width != -1) {
width *= 256;
if (width > Short.MAX_VALUE) { //width can be bigger that Short.MAX_VALUE!
width = Short.MAX_VALUE;
}
sheet.setColumnWidth(column, (short) (width * 256));
sheet.setColumnWidth(column, (short) (width));
}
}


+ 25
- 8
src/java/org/apache/poi/poifs/dev/POIFSLister.java 查看文件

@@ -45,37 +45,54 @@ public class POIFSLister {
System.exit(1);
}

for (int j = 0; j < args.length; j++)
{
viewFile(args[ j ]);
boolean withSizes = false;
for (int j = 0; j < args.length; j++) {
if(args[j].equalsIgnoreCase("-size") ||
args[j].equalsIgnoreCase("-sizes")) {
withSizes = true;
} else {
viewFile(args[j], withSizes);
}
}
}

public static void viewFile(final String filename) throws IOException
public static void viewFile(final String filename, boolean withSizes) throws IOException
{
POIFSFileSystem fs = new POIFSFileSystem(
new FileInputStream(filename)
);
displayDirectory(fs.getRoot(), "");
displayDirectory(fs.getRoot(), "", withSizes);
}
public static void displayDirectory(DirectoryNode dir, String indent) {
public static void displayDirectory(DirectoryNode dir, String indent, boolean withSizes) {
System.out.println(indent + dir.getName() + " -");
String newIndent = indent + " ";
boolean hadChildren = false;
for(Iterator it = dir.getEntries(); it.hasNext(); ) {
hadChildren = true;
Object entry = it.next();
if(entry instanceof DirectoryNode) {
displayDirectory((DirectoryNode)entry, newIndent);
displayDirectory((DirectoryNode)entry, newIndent, withSizes);
} else {
DocumentNode doc = (DocumentNode)entry;
String name = doc.getName();
String size = "";
if(name.charAt(0) < 10) {
String altname = "(0x0" + (int)name.charAt(0) + ")" + name.substring(1);
name = name.substring(1) + " <" + altname + ">";
}
System.out.println(newIndent + name);
if(withSizes) {
size = " [" +
doc.getSize() + " / 0x" +
Integer.toHexString(doc.getSize()) +
"]";
}
System.out.println(newIndent + name + size);
}
}
if(!hadChildren) {
System.out.println(newIndent + "(no children)");
}
}
}

+ 353
- 0
src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java 查看文件

@@ -0,0 +1,353 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hpbf.dev;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

import org.apache.poi.ddf.DefaultEscherRecordFactory;
import org.apache.poi.ddf.EscherRecord;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.StringUtil;

/**
* For dumping out the contents of HPBF (Publisher)
* files, while we try to figure out how they're
* constructed.
*/
public class HPBFDumper {
private POIFSFileSystem fs;
public HPBFDumper(POIFSFileSystem fs) {
this.fs = fs;
}
public HPBFDumper(InputStream inp) throws IOException {
this(new POIFSFileSystem(inp));
}
private static byte[] getData(DirectoryNode dir, String name) throws IOException {
DocumentEntry docProps =
(DocumentEntry)dir.getEntry(name);

// Grab the document stream
byte[] d = new byte[docProps.getSize()];
dir.createDocumentInputStream(name).read(d);
// All done
return d;
}
/**
* Dumps out the given number of bytes as hex,
* two chars
*/
private String dumpBytes(byte[] data, int offset, int len) {
StringBuffer ret = new StringBuffer();
for(int i=0; i<len; i++) {
int j = i + offset;
int b = data[j];
if(b < 0) { b += 256; }
String bs = Integer.toHexString(b);
if(bs.length() == 1)
ret.append('0');
ret.append(bs);
ret.append(' ');
}
return ret.toString();
}
public static void main(String[] args) throws Exception {
if(args.length < 1) {
System.err.println("Use:");
System.err.println(" HPBFDumper <filename>");
System.exit(1);
}
HPBFDumper dump = new HPBFDumper(
new FileInputStream(args[0])
);
System.out.println("Dumping " + args[0]);
dump.dumpContents();
dump.dumpEnvelope();
dump.dumpEscher();
dump.dump001CompObj(dump.fs.getRoot());
dump.dumpQuill();
// Still to go:
// (0x03)Internal
// Objects
}
/**
* Dump out the escher parts of the file.
* Escher -> EscherStm and EscherDelayStm
*/
public void dumpEscher() throws IOException {
DirectoryNode escherDir = (DirectoryNode)
fs.getRoot().getEntry("Escher");
dumpEscherStm(escherDir);
dumpEscherDelayStm(escherDir);
}
private void dumpEscherStream(byte[] data) {
DefaultEscherRecordFactory erf =
new DefaultEscherRecordFactory();
// Dump
int left = data.length;
while(left > 0) {
EscherRecord er = erf.createRecord(data, 0);
er.fillFields(data, 0, erf);
left -= er.getRecordSize();
System.out.println(er.toString());
}
}
protected void dumpEscherStm(DirectoryNode escherDir) throws IOException {
byte[] data = getData(escherDir, "EscherStm");
System.out.println("");
System.out.println("EscherStm - " + data.length + " bytes long:");
if(data.length > 0)
dumpEscherStream(data);
}
protected void dumpEscherDelayStm(DirectoryNode escherDir) throws IOException {
byte[] data = getData(escherDir, "EscherDelayStm");
System.out.println("");
System.out.println("EscherDelayStm - " + data.length + " bytes long:");
if(data.length > 0)
dumpEscherStream(data);
}
public void dumpEnvelope() throws IOException {
byte[] data = getData(fs.getRoot(), "Envelope");
System.out.println("");
System.out.println("Envelope - " + data.length + " bytes long:");
}
public void dumpContents() throws IOException {
byte[] data = getData(fs.getRoot(), "Contents");
System.out.println("");
System.out.println("Contents - " + data.length + " bytes long:");
// 8 bytes, always seems to be
// E8 AC 2C 00 E8 03 05 01
// E8 AC 2C 00 E8 03 05 01
// 4 bytes - size of contents
// 13/15 00 00 01
// ....
// E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... ..........
// 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... ..
// 01 18 30 00 03 20 00 00
// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
// 00 00 00 88 1E 00 00 00
// 01 18 31 00 03 20 00 00
// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
// 00 00 00 88 1E 00 00 00
// 01 18 32 00 03 20 00 00
// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
// 00 00 00 88 1E 00 00 00
}
public void dumpCONTENTSraw(DirectoryNode dir) throws IOException {
byte[] data = getData(dir, "CONTENTS");
System.out.println("");
System.out.println("CONTENTS - " + data.length + " bytes long:");
// Between the start and 0x200 we have
// CHNKINK(space) + 24 bytes
// 0x1800
// TEXT + 6 bytes
// TEXT + 8 bytes
// 0x1800
// STSH + 6 bytes
// STSH + 8 bytes
// 0x1800
// STSH + 6 bytes
// STSH + 8 bytes
// but towards 0x200 the pattern may
// break down a little bit
// After the second of a given type,
// it seems to be 4 bytes giving the start,
// then 4 bytes giving the length, then
// 18 00
System.out.println(
new String(data, 0, 8) +
dumpBytes(data, 8, 0x20-8)
);
int pos = 0x20;
boolean sixNotEight = true;
while(pos < 0x200) {
if(sixNotEight) {
System.out.println(
dumpBytes(data, pos, 2)
);
pos += 2;
}
String text = new String(data, pos, 4);
int blen = 8;
if(sixNotEight)
blen = 6;
System.out.println(
text + " " + dumpBytes(data, pos+4, blen)
);
pos += 4 + blen;
sixNotEight = ! sixNotEight;
}
// Text from 0x200 onwards until we get
// to \r(00)\n(00)(00)(00)
int textStop = -1;
for(int i=0x200; i<data.length-2 && textStop == -1; i++) {
if(data[i] == 0 && data[i+1] == 0 && data[i+2] == 0) {
textStop = i;
}
}
if(textStop > 0) {
int len = (textStop - 0x200) / 2;
System.out.println("");
System.out.println(
StringUtil.getFromUnicodeLE(data, 0x200, len)
);
}
// The font list comes slightly later
// The hyperlinks may come before the fonts,
// or slightly in front
}
public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException {
byte[] data = getData(dir, "CONTENTS");
System.out.println("");
System.out.println("CONTENTS - " + data.length + " bytes long:");
String[] startType = new String[20];
String[] endType = new String[20];
int[] optA = new int[20];
int[] optB = new int[20];
int[] optC = new int[20];
int[] from = new int[20];
int[] len = new int[20];
for(int i=0; i<20; i++) {
int offset = 0x20 + i*24;
if(data[offset] == 0x18 && data[offset+1] == 0x00) {
// Has data
startType[i] = new String(data, offset+2, 4);
optA[i] = LittleEndian.getUShort(data, offset+6);
optB[i] = LittleEndian.getUShort(data, offset+8);
optC[i] = LittleEndian.getUShort(data, offset+10);
endType[i] = new String(data, offset+12, 4);
from[i] = (int)LittleEndian.getUInt(data, offset+16);
len[i] = (int)LittleEndian.getUInt(data, offset+20);
} else {
// Doesn't have data
}
}
String text = StringUtil.getFromUnicodeLE(
data, from[0], len[0]/2
);
// Dump
for(int i=0; i<20; i++) {
String num = Integer.toString(i);
if(i < 10) {
num = "0" + i;
}
System.out.print(num + " ");
if(startType[i] == null) {
System.out.println("(not present)");
} else {
System.out.println(
"\t" +
startType[i] + " " +
optA[i] + " " +
optB[i] + " " +
optC[i]
);
System.out.println(
"\t" +
endType[i] + " " +
"from: " +
Integer.toHexString(from[i]) +
" (" + from[i] + ")" +
", len: " +
Integer.toHexString(len[i]) +
" (" + len[i] + ")"
);
}
}
// Text
System.out.println("");
System.out.println("TEXT:");
System.out.println(text);
System.out.println("");
// All the others
for(int i=0; i<20; i++) {
if(startType[i] == null) {
continue;
}
int start = from[i];
System.out.println(
startType[i] + " -> " + endType[i] +
" @ " + Integer.toHexString(start) +
" (" + start + ")"
);
System.out.println("\t" + dumpBytes(data, start, 4));
System.out.println("\t" + dumpBytes(data, start+4, 4));
System.out.println("\t" + dumpBytes(data, start+8, 4));
System.out.println("\t(etc)");
}
}
protected void dump001CompObj(DirectoryNode dir) {
// TODO
}
public void dumpQuill() throws IOException {
DirectoryNode quillDir = (DirectoryNode)
fs.getRoot().getEntry("Quill");
DirectoryNode quillSubDir = (DirectoryNode)
quillDir.getEntry("QuillSub");

dump001CompObj(quillSubDir);
dumpCONTENTSraw(quillSubDir);
dumpCONTENTSguessed(quillSubDir);
}
}

二进制
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.pub 查看文件


+ 29
- 0
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.txt 查看文件

@@ -0,0 +1,29 @@
This is some text on the first page
It’s in times new roman, font size 10, all normal


This is in bold and italic
It’s Arial, 20 point font
It’s in the second textbox on the first page


This is the second page

It is also times new roman, 10 point


Table on page 2 Top right
P2 table left P2 table right
Bottom Left Bottom Right


This text is on page two
This is a link to Apache POI
More normal text
Link to a file


More text, more hyperlinks
email link
Final hyperlink
Within doc to page 1

二进制
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub 查看文件


+ 34
- 0
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt 查看文件

@@ -0,0 +1,34 @@
This is some text on the first page
It’s in times new roman, font size 10, all normal

We’ve added some more text in here, to push all the offsets about a bit.



This is in bold and italic
It’s Arial, 20 point font
It’s in the second textbox on the first page

Ditto with more text in here.


This is the second page

It is also times new roman, 10 point


Table on page 2 Top right
P2 table left P2 table right
Bottom Left Bottom Right


This text is on page two
This is a link to Apache POI
More normal text
Link to a file


More text, more hyperlinks
email link
Final hyperlink
Within doc to page 1

二进制
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2000.pub 查看文件


二进制
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub 查看文件


+ 29
- 0
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt 查看文件

@@ -0,0 +1,29 @@
This is some text on the first page
It’s in times new roman, font size 10, all normal


This is in bold and italic
It’s Arial, 20 point font
It’s in the second textbox on the first page


This is the second page12345678

It is also times new roman, 10 point


Table on page 2 Top right
P2 table left P2 table right
Bottom Left Bottom Right


This text is on page two
This is a link to Apache POI
More normal text
Link to a file


More text, more hyperlinks
email link
Final hyperlink
Within doc to page 1

二进制
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub 查看文件


+ 29
- 0
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt 查看文件

@@ -0,0 +1,29 @@
This is some text on the first page
It’s in times new roman, font size 10, all normal


This is in bold and italic
It’s Arial, 20 point font
It’s in the second textbox on the first page


This is the second page

It is also times new roman, 10 point


Table on page 2 Top right
P2 table left P2 table right
Bottom Left Bottom Right


This text is on page two
This is a link to Apache POI
More normal text
Link to a file


More text, more hyperlinks
email link
Final hyperlink
Within doc to page 1

二进制
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample98.pub 查看文件


二进制
src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleBrochure.pub 查看文件


二进制
src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleNewsletter.pub 查看文件


+ 29
- 2
src/testcases/org/apache/poi/hssf/model/TestSheet.java 查看文件

@@ -32,6 +32,7 @@ import org.apache.poi.hssf.record.CellValueRecordInterface;
import org.apache.poi.hssf.record.ColumnInfoRecord;
import org.apache.poi.hssf.record.DimensionsRecord;
import org.apache.poi.hssf.record.EOFRecord;
import org.apache.poi.hssf.record.GutsRecord;
import org.apache.poi.hssf.record.IndexRecord;
import org.apache.poi.hssf.record.MergeCellsRecord;
import org.apache.poi.hssf.record.Record;
@@ -41,6 +42,8 @@ import org.apache.poi.hssf.record.UncalcedRecord;
import org.apache.poi.hssf.record.aggregates.ColumnInfoRecordsAggregate;
import org.apache.poi.hssf.record.aggregates.PageSettingsBlock;
import org.apache.poi.hssf.record.aggregates.RowRecordsAggregate;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.util.CellRangeAddress;

/**
@@ -438,8 +441,8 @@ public final class TestSheet extends TestCase {
if (false) {
// make sure that RRA and VRA are in the right place
// (Aug 2008) since the VRA is now part of the RRA, there is much less chance that
// they could get out of order. Still, one could write serialize the sheet here,
// and read back with EventRecordFactory to make sure...
// they could get out of order. Still, one could write serialize the sheet here,
// and read back with EventRecordFactory to make sure...
}
assertEquals(242, dbCellRecordPos);
}
@@ -475,5 +478,29 @@ public final class TestSheet extends TestCase {
return _indexRecord;
}
}
/**
* Checks for bug introduced around r682282-r683880 that caused a second GUTS records
* which in turn got the dimensions record out of alignment
*/
public void testGutsRecord_bug45640() {
Sheet sheet = Sheet.createSheet();
sheet.addRow(new RowRecord(0));
sheet.addRow(new RowRecord(1));
sheet.groupRowRange( 0, 1, true );
sheet.toString();
List recs = sheet.getRecords();
int count=0;
for(int i=0; i< recs.size(); i++) {
if (recs.get(i) instanceof GutsRecord) {
count++;
}
}
if (count == 2) {
throw new AssertionFailedError("Identified bug 45640");
}
assertEquals(1, count);
}
}


正在加载...
取消
保存