* document, but excludes any headers and footers.
*/
public Range getRange() {
+ // First up, trigger a full-recalculate
+ // Needed in case of deletes etc
+ getOverallRange();
+
+ // Now, return the real one
return new Range(
_cpSplit.getMainDocumentStart(),
_cpSplit.getMainDocumentEnd(),
{
super(documentStream, offset);
- for (int x = 0; x < _crun; x++)
- {
- boolean isUnicode = tpt.isUnicodeAt( getStart(x) );
- _papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode));
+ for (int x = 0; x < _crun; x++) {
+ int startAt = getStart(x) - fcMin;
+ int endAt = getEnd(x) - fcMin;
+ boolean isUnicode = tpt.isUnicodeAt(startAt);
+
+ _papxList.add(new PAPX(startAt, endAt, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode));
}
_fkp = null;
_dataStream = dataStream;
{
int end = start + length;
- if (_cpEnd > start)
- {
- if (_cpStart < end)
- {
- _cpEnd = end >= _cpEnd ? start : _cpEnd - length;
- _cpStart = Math.min(start, _cpStart);
- }
- else
- {
- _cpEnd -= length;
- _cpStart -= length;
- }
+ if (_cpEnd > start) {
+ // The start of the change is before we end
+
+ if (_cpStart < end) {
+ // The delete was somewhere in the middle of us
+ _cpEnd = end >= _cpEnd ? start : _cpEnd - length;
+ _cpStart = Math.min(start, _cpStart);
+ } else {
+ // The delete was before us
+ _cpEnd -= length;
+ _cpStart -= length;
+ }
}
}
package org.apache.poi.hwpf.model;
-import org.apache.poi.hwpf.sprm.SprmBuffer;
-import org.apache.poi.hwpf.sprm.SectionSprmUncompressor;
import org.apache.poi.hwpf.sprm.SectionSprmCompressor;
+import org.apache.poi.hwpf.sprm.SectionSprmUncompressor;
import org.apache.poi.hwpf.usermodel.SectionProperties;
+/**
+ * TODO - figure out if this works in characters, like most
+ * things do, or in bytes as PAPX / CHPX does.
+ */
public class SEPX extends PropertyNode
{
if(end > buf.length()) {
throw new StringIndexOutOfBoundsException("Index " + end + " out of range 0 -> " + buf.length());
}
+ if(end < start) {
+ throw new StringIndexOutOfBoundsException("Asked for text from " + start + " to " + end + ", which has an end before the start!");
+ }
return buf.substring(start, end);
}
absPlaceHolderIndex,
(absPlaceHolderIndex + pPlaceHolder.length()), getDocument()
);
- if (subRange.usesUnicode()) {
- absPlaceHolderIndex = getStartOffset() + (pOffset * 2);
- subRange = new Range(
- absPlaceHolderIndex,
- (absPlaceHolderIndex + (pPlaceHolder.length() * 2)),
- getDocument()
- );
- }
// this Range isn't a proper parent of the subRange() so we'll have to keep
// track of an updated endOffset on our own
(absPlaceHolderIndex + pPlaceHolder.length() + pValue.length()),
getDocument()
);
- if (subRange.usesUnicode())
- subRange = new Range(
- (absPlaceHolderIndex + (pValue.length() * 2)),
- (absPlaceHolderIndex + (pPlaceHolder.length() * 2) +
- (pValue.length() * 2)), getDocument()
- );
// deletes are automagically propagated
subRange.delete();
/**
* Test to see if Range.delete() works even if the Range contains a
* CharacterRun that uses Unicode characters.
- *
- * TODO - re-enable me when unicode paragraph stuff is fixed!
*/
public class TestRangeDelete extends TestCase {
// u201c and u201d are "smart-quotes"
+ private String introText =
+ "Introduction\r";
+ private String fillerText =
+ "${delete} This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r";
private String originalText =
"It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the ${organization} ${delete} and all the POI contributors for their assistance in this matter.\r";
private String searchText = "${delete}";
public void testDocStructure() throws Exception {
HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
+ Range range;
+ Section section;
+ Paragraph para;
- Range range = daDoc.getOverallRange();
-
+ // First, check overall
+ range = daDoc.getOverallRange();
assertEquals(1, range.numSections());
- Section section = range.getSection(0);
-
- assertEquals(5, section.numParagraphs());
- Paragraph para = section.getParagraph(2);
-
- assertEquals(5, para.numCharacterRuns());
-
- assertEquals(originalText, para.text());
+ assertEquals(4, range.numParagraphs());
- // Now check on just the main text
+ // Now, onto just the doc bit
range = daDoc.getRange();
-
+
assertEquals(1, range.numSections());
section = range.getSection(0);
- assertEquals(5, section.numParagraphs());
+ assertEquals(4, section.numParagraphs());
+
+ para = section.getParagraph(0);
+ assertEquals(1, para.numCharacterRuns());
+ assertEquals(introText, para.text());
+
+ para = section.getParagraph(1);
+ assertEquals(2, para.numCharacterRuns());
+ assertEquals(fillerText, para.text());
+
para = section.getParagraph(2);
-
- assertEquals(5, para.numCharacterRuns());
-
+ assertEquals(6, para.numCharacterRuns());
assertEquals(originalText, para.text());
}
assertEquals(1, range.numSections());
Section section = range.getSection(0);
- assertEquals(5, section.numParagraphs());
+ assertEquals(4, section.numParagraphs());
Paragraph para = section.getParagraph(2);
assertEquals(192, offset);
int absOffset = para.getStartOffset() + offset;
- if (para.usesUnicode())
- absOffset = para.getStartOffset() + (offset * 2);
-
Range subRange = new Range(absOffset, (absOffset + searchText.length()), para.getDocument());
- if (subRange.usesUnicode())
- subRange = new Range(absOffset, (absOffset + (searchText.length() * 2)), para.getDocument());
assertEquals(searchText, subRange.text());
assertEquals(1, range.numSections());
section = range.getSection(0);
- assertEquals(5, section.numParagraphs());
+ assertEquals(4, section.numParagraphs());
para = section.getParagraph(2);
text = para.text();
assertEquals(1, range.numSections());
Section section = range.getSection(0);
- assertEquals(5, section.numParagraphs());
+ assertEquals(4, section.numParagraphs());
Paragraph para = section.getParagraph(2);
boolean keepLooking = true;
while (keepLooking) {
-
+ // Reload the range every time
+ range = daDoc.getRange();
int offset = range.text().indexOf(searchText);
if (offset >= 0) {
int absOffset = range.getStartOffset() + offset;
- if (range.usesUnicode())
- absOffset = range.getStartOffset() + (offset * 2);
Range subRange = new Range(
absOffset, (absOffset + searchText.length()), range.getDocument());
- if (subRange.usesUnicode())
- subRange = new Range(
- absOffset, (absOffset + (searchText.length() * 2)), range.getDocument());
assertEquals(searchText, subRange.text());
subRange.delete();
- } else
+ } else {
keepLooking = false;
+ }
}
// we need to let the model re-calculate the Range before we use it
assertEquals(1, range.numSections());
section = range.getSection(0);
- assertEquals(5, section.numParagraphs());
+ assertEquals(4, section.numParagraphs());
+
+ para = section.getParagraph(0);
+ text = para.text();
+ assertEquals(introText, text);
para = section.getParagraph(1);
text = para.text();
private String originalText =
"It is used to confirm that text insertion works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present.\r";
private String textToInsert = "Look at me! I'm cool! ";
- private int insertionPoint = 244;
+ private int insertionPoint = 122;
private String illustrativeDocFile;
assertEquals(3, section.numParagraphs());
Paragraph para = section.getParagraph(2);
-
- assertEquals(3, para.numCharacterRuns());
- String text = para.getCharacterRun(0).text() + para.getCharacterRun(1).text() +
- para.getCharacterRun(2).text();
+ assertEquals(originalText, para.text());
+
+ assertEquals(6, para.numCharacterRuns());
+ String text =
+ para.getCharacterRun(0).text() +
+ para.getCharacterRun(1).text() +
+ para.getCharacterRun(2).text() +
+ para.getCharacterRun(3).text() +
+ para.getCharacterRun(4).text() +
+ para.getCharacterRun(5).text()
+ ;
assertEquals(originalText, text);
+
+ assertEquals(insertionPoint, para.getStartOffset());
}
/**
assertEquals(3, section.numParagraphs());
Paragraph para = section.getParagraph(2);
-
- assertEquals(3, para.numCharacterRuns());
- String text = para.getCharacterRun(0).text() + para.getCharacterRun(1).text() +
- para.getCharacterRun(2).text();
+ assertEquals((textToInsert + originalText), para.text());
+
+ assertEquals(6, para.numCharacterRuns());
+ String text =
+ para.getCharacterRun(0).text() +
+ para.getCharacterRun(1).text() +
+ para.getCharacterRun(2).text() +
+ para.getCharacterRun(3).text() +
+ para.getCharacterRun(4).text() +
+ para.getCharacterRun(5).text()
+ ;
// System.out.println(text);
assertEquals(p2_parts[0] + "\r", r.getParagraph(11).text());
}
public void testUnicodeStyling() throws Exception {
- // TODO
+ Range r = u.getRange();
+ String[] p1_parts = u_page_1.split("\r");
+
+ Paragraph p1 = r.getParagraph(0);
+ Paragraph p7 = r.getParagraph(6);
+
+ // Line ending in its own run each time!
+ assertEquals(2, p1.numCharacterRuns());
+ assertEquals(2, p7.numCharacterRuns());
+
+ CharacterRun c1a = p1.getCharacterRun(0);
+ CharacterRun c1b = p1.getCharacterRun(1);
+ CharacterRun c7a = p7.getCharacterRun(0);
+ CharacterRun c7b = p7.getCharacterRun(1);
+
+ assertEquals("Times New Roman", c1a.getFontName()); // No Calibri
+ assertEquals(22, c1a.getFontSize());
+
+ assertEquals("Times New Roman", c1b.getFontName()); // No Calibri
+ assertEquals(22, c1b.getFontSize());
+
+ assertEquals("Times New Roman", c7a.getFontName());
+ assertEquals(48, c7a.getFontSize());
+
+ assertEquals("Times New Roman", c7b.getFontName());
+ assertEquals(48, c7b.getFontSize());
+
+ // Now check where they crop up
+ assertEquals(
+ 0,
+ c1a.getStartOffset()
+ );
+ assertEquals(
+ p1_parts[0].length(),
+ c1a.getEndOffset()
+ );
+
+ assertEquals(
+ p1_parts[0].length(),
+ c1b.getStartOffset()
+ );
+ assertEquals(
+ p1_parts[0].length()+1,
+ c1b.getEndOffset()
+ );
+
+ assertEquals(
+ p1_parts[0].length() + 1 +
+ p1_parts[1].length() + 1 +
+ p1_parts[2].length() + 1 +
+ p1_parts[3].length() + 1 +
+ p1_parts[4].length() + 1 +
+ p1_parts[5].length() + 1,
+ c7a.getStartOffset()
+ );
+ assertEquals(
+ p1_parts[0].length() + 1 +
+ p1_parts[1].length() + 1 +
+ p1_parts[2].length() + 1 +
+ p1_parts[3].length() + 1 +
+ p1_parts[4].length() + 1 +
+ p1_parts[5].length() + 1 +
+ 1,
+ c7a.getEndOffset()
+ );
+
+ assertEquals(
+ p1_parts[0].length() + 1 +
+ p1_parts[1].length() + 1 +
+ p1_parts[2].length() + 1 +
+ p1_parts[3].length() + 1 +
+ p1_parts[4].length() + 1 +
+ p1_parts[5].length() + 1 +
+ 1,
+ c7b.getStartOffset()
+ );
+ assertEquals(
+ p1_parts[0].length() + 1 +
+ p1_parts[1].length() + 1 +
+ p1_parts[2].length() + 1 +
+ p1_parts[3].length() + 1 +
+ p1_parts[4].length() + 1 +
+ p1_parts[5].length() + 1 +
+ p1_parts[6].length() + 1,
+ c7b.getEndOffset()
+ );
}
}
assertEquals(1, range.numSections());
Section section = range.getSection(0);
- assertEquals(5, section.numParagraphs());
+ assertEquals(4, section.numParagraphs());
Paragraph para = section.getParagraph(2);
- assertEquals(5, para.numCharacterRuns());
- String text = para.getCharacterRun(0).text() + para.getCharacterRun(1).text() +
- para.getCharacterRun(2).text() + para.getCharacterRun(3).text() + para.getCharacterRun(4).text();
+ assertEquals(6, para.numCharacterRuns());
+ String text =
+ para.getCharacterRun(0).text() +
+ para.getCharacterRun(1).text() +
+ para.getCharacterRun(2).text() +
+ para.getCharacterRun(3).text() +
+ para.getCharacterRun(4).text() +
+ para.getCharacterRun(5).text()
+ ;
assertEquals(originalText, text);
}
assertEquals(1, range.numSections());
Section section = range.getSection(0);
- assertEquals(5, section.numParagraphs());
+ assertEquals(4, section.numParagraphs());
Paragraph para = section.getParagraph(2);
assertEquals(1, range.numSections());
Section section = range.getSection(0);
- assertEquals(5, section.numParagraphs());
+ assertEquals(4, section.numParagraphs());
Paragraph para = section.getParagraph(2);
assertEquals(1, range.numSections());
section = range.getSection(0);
- assertEquals(5, section.numParagraphs());
+ assertEquals(4, section.numParagraphs());
para = section.getParagraph(2);
text = para.text();