|
|
@@ -39,18 +39,18 @@ public class TestWordToHtmlConverter extends POITestCase |
|
|
|
private static String getHtmlText( final String sampleFileName ) |
|
|
|
throws Exception |
|
|
|
{ |
|
|
|
return getHtmlText( sampleFileName, false ); |
|
|
|
return getHtmlText( sampleFileName, false); |
|
|
|
} |
|
|
|
|
|
|
|
private static String getHtmlText( final String sampleFileName, |
|
|
|
boolean emulatePictureStorage ) throws Exception |
|
|
|
{ |
|
|
|
HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples |
|
|
|
.getDocumentInstance().openResourceAsStream( sampleFileName ) ); |
|
|
|
.getDocumentInstance().openResourceAsStream( sampleFileName )); |
|
|
|
|
|
|
|
Document newDocument = XMLHelper.getDocumentBuilderFactory().newDocumentBuilder().newDocument(); |
|
|
|
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( |
|
|
|
newDocument ); |
|
|
|
newDocument); |
|
|
|
|
|
|
|
if ( emulatePictureStorage ) |
|
|
|
{ |
|
|
@@ -62,21 +62,21 @@ public class TestWordToHtmlConverter extends POITestCase |
|
|
|
{ |
|
|
|
return suggestedName; |
|
|
|
} |
|
|
|
} ); |
|
|
|
}); |
|
|
|
} |
|
|
|
|
|
|
|
wordToHtmlConverter.processDocument( hwpfDocument ); |
|
|
|
wordToHtmlConverter.processDocument( hwpfDocument); |
|
|
|
|
|
|
|
StringWriter stringWriter = new StringWriter(); |
|
|
|
|
|
|
|
Transformer transformer = TransformerFactory.newInstance() |
|
|
|
.newTransformer(); |
|
|
|
transformer.setOutputProperty( OutputKeys.INDENT, "yes" ); |
|
|
|
transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" ); |
|
|
|
transformer.setOutputProperty( OutputKeys.METHOD, "html" ); |
|
|
|
transformer.setOutputProperty( OutputKeys.INDENT, "yes"); |
|
|
|
transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8"); |
|
|
|
transformer.setOutputProperty( OutputKeys.METHOD, "html"); |
|
|
|
transformer.transform( |
|
|
|
new DOMSource( wordToHtmlConverter.getDocument() ), |
|
|
|
new StreamResult( stringWriter ) ); |
|
|
|
new StreamResult( stringWriter )); |
|
|
|
|
|
|
|
String result = stringWriter.toString(); |
|
|
|
return result; |
|
|
@@ -84,172 +84,181 @@ public class TestWordToHtmlConverter extends POITestCase |
|
|
|
|
|
|
|
public void testAIOOBTap() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "AIOOB-Tap.doc" ); |
|
|
|
assertContains( result.substring( 0, 6000 ), "<table class=\"t1\">" ); |
|
|
|
String result = getHtmlText( "AIOOB-Tap.doc"); |
|
|
|
assertContains(result.substring( 0, 6000 ), "<table class=\"t1\">"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testBug33519() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "Bug33519.doc" ); |
|
|
|
String result = getHtmlText( "Bug33519.doc"); |
|
|
|
assertContains( |
|
|
|
result, |
|
|
|
"\u041F\u043B\u0430\u043D\u0438\u043D\u0441\u043A\u0438 \u0442\u0443\u0440\u043E\u0432\u0435" ); |
|
|
|
assertContains( result, |
|
|
|
"\u042F\u0432\u043E\u0440 \u0410\u0441\u0435\u043D\u043E\u0432" ); |
|
|
|
"\u041F\u043B\u0430\u043D\u0438\u043D\u0441\u043A\u0438 \u0442\u0443\u0440\u043E\u0432\u0435"); |
|
|
|
assertContains(result, |
|
|
|
"\u042F\u0432\u043E\u0440 \u0410\u0441\u0435\u043D\u043E\u0432"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testBug46610_2() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "Bug46610_2.doc" ); |
|
|
|
String result = getHtmlText( "Bug46610_2.doc"); |
|
|
|
assertContains( |
|
|
|
result, |
|
|
|
"012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ); |
|
|
|
"012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testBug46817() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "Bug46817.doc" ); |
|
|
|
String result = getHtmlText( "Bug46817.doc"); |
|
|
|
final String substring = "<table class=\"t1\">"; |
|
|
|
assertContains( result, substring ); |
|
|
|
assertContains(result, substring); |
|
|
|
} |
|
|
|
|
|
|
|
public void testBug47286() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "Bug47286.doc" ); |
|
|
|
String result = getHtmlText( "Bug47286.doc"); |
|
|
|
|
|
|
|
assertFalse( result.contains( "FORMTEXT" ) ); |
|
|
|
assertFalse(result.contains( "FORMTEXT" )); |
|
|
|
|
|
|
|
assertContains( result, "color:#4f6228;" ); |
|
|
|
assertContains( result, "Passport No and the date of expire" ); |
|
|
|
assertContains( result, "mfa.gov.cy" ); |
|
|
|
assertContains(result, "color:#4f6228;"); |
|
|
|
assertContains(result, "Passport No and the date of expire"); |
|
|
|
assertContains(result, "mfa.gov.cy"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testBug48075() throws Exception |
|
|
|
{ |
|
|
|
getHtmlText( "Bug48075.doc" ); |
|
|
|
getHtmlText( "Bug48075.doc"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testBug52583() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "Bug52583.doc" ); |
|
|
|
String result = getHtmlText( "Bug52583.doc"); |
|
|
|
assertContains( |
|
|
|
result, |
|
|
|
"<select><option selected>riri</option><option>fifi</option><option>loulou</option></select>" ); |
|
|
|
"<select><option selected>riri</option><option>fifi</option><option>loulou</option></select>"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testBug53182() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "Bug53182.doc" ); |
|
|
|
assertFalse( result.contains( "italic" ) ); |
|
|
|
String result = getHtmlText( "Bug53182.doc"); |
|
|
|
assertFalse(result.contains( "italic" )); |
|
|
|
} |
|
|
|
|
|
|
|
public void testDocumentProperties() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "documentProperties.doc" ); |
|
|
|
String result = getHtmlText( "documentProperties.doc"); |
|
|
|
|
|
|
|
assertContains( result, "<title>This is document title</title>" ); |
|
|
|
assertContains( result, |
|
|
|
"<meta content=\"This is document keywords\" name=\"keywords\">" ); |
|
|
|
assertContains(result, "<title>This is document title</title>"); |
|
|
|
assertContains(result, |
|
|
|
"<meta content=\"This is document keywords\" name=\"keywords\">"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testEmailhyperlink() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "Bug47286.doc" ); |
|
|
|
String result = getHtmlText( "Bug47286.doc"); |
|
|
|
final String substring = "provisastpet@mfa.gov.cy"; |
|
|
|
assertContains( result, substring ); |
|
|
|
assertContains(result, substring); |
|
|
|
} |
|
|
|
|
|
|
|
public void testEndnote() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "endingnote.doc" ); |
|
|
|
String result = getHtmlText( "endingnote.doc"); |
|
|
|
|
|
|
|
assertContains( |
|
|
|
result, |
|
|
|
"<a class=\"a1 endnoteanchor\" href=\"#endnote_1\" name=\"endnote_back_1\">1</a>" ); |
|
|
|
"<a class=\"a1 endnoteanchor\" href=\"#endnote_1\" name=\"endnote_back_1\">1</a>"); |
|
|
|
assertContains( |
|
|
|
result, |
|
|
|
"<a class=\"a1 endnoteindex\" href=\"#endnote_back_1\" name=\"endnote_1\">1</a> <span" ); |
|
|
|
assertContains( result, "Ending note text" ); |
|
|
|
"<a class=\"a1 endnoteindex\" href=\"#endnote_back_1\" name=\"endnote_1\">1</a> <span"); |
|
|
|
assertContains(result, "Ending note text"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testEquation() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "equation.doc" ); |
|
|
|
String result = getHtmlText( "equation.doc"); |
|
|
|
|
|
|
|
assertContains( result, "<!--Image link to '0.emf' can be here-->" ); |
|
|
|
assertContains(result, "<!--Image link to '0.emf' can be here-->"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testHyperlink() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "hyperlink.doc" ); |
|
|
|
String result = getHtmlText( "hyperlink.doc"); |
|
|
|
|
|
|
|
assertContains( result, "<span>Before text; </span><a " ); |
|
|
|
assertContains( result, |
|
|
|
"<a href=\"http://testuri.org/\"><span class=\"s1\">Hyperlink text</span></a>" ); |
|
|
|
assertContains( result, "</a><span>; after text</span>" ); |
|
|
|
assertContains(result, "<span>Before text; </span><a "); |
|
|
|
assertContains(result, |
|
|
|
"<a href=\"http://testuri.org/\"><span class=\"s1\">Hyperlink text</span></a>"); |
|
|
|
assertContains(result, "</a><span>; after text</span>"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testInnerTable() throws Exception |
|
|
|
{ |
|
|
|
getHtmlText( "innertable.doc" ); |
|
|
|
getHtmlText( "innertable.doc"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testListsMargins() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "lists-margins.doc" ); |
|
|
|
|
|
|
|
assertContains( result, |
|
|
|
".s1{display: inline-block; text-indent: 0; min-width: 0.4861111in;}" ); |
|
|
|
assertContains( result, |
|
|
|
".s2{display: inline-block; text-indent: 0; min-width: 0.23055555in;}" ); |
|
|
|
assertContains( result, |
|
|
|
".s3{display: inline-block; text-indent: 0; min-width: 0.28541666in;}" ); |
|
|
|
assertContains( result, |
|
|
|
".s4{display: inline-block; text-indent: 0; min-width: 0.28333333in;}" ); |
|
|
|
assertContains( result, |
|
|
|
".p4{text-indent:-0.59652776in;margin-left:-0.70069444in;" ); |
|
|
|
String result = getHtmlText( "lists-margins.doc"); |
|
|
|
|
|
|
|
assertContains(result, |
|
|
|
".s1{display: inline-block; text-indent: 0; min-width: 0.4861111in;}"); |
|
|
|
assertContains(result, |
|
|
|
".s2{display: inline-block; text-indent: 0; min-width: 0.23055555in;}"); |
|
|
|
assertContains(result, |
|
|
|
".s3{display: inline-block; text-indent: 0; min-width: 0.28541666in;}"); |
|
|
|
assertContains(result, |
|
|
|
".s4{display: inline-block; text-indent: 0; min-width: 0.28333333in;}"); |
|
|
|
assertContains(result, |
|
|
|
".p4{text-indent:-0.59652776in;margin-left:-0.70069444in;"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testO_kurs_doc() throws Exception |
|
|
|
{ |
|
|
|
getHtmlText( "o_kurs.doc" ); |
|
|
|
getHtmlText( "o_kurs.doc"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testPageref() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "pageref.doc" ); |
|
|
|
String result = getHtmlText( "pageref.doc"); |
|
|
|
|
|
|
|
assertContains( result, "<a href=\"#userref\">" ); |
|
|
|
assertContains( result, "<a name=\"userref\">" ); |
|
|
|
assertContains( result, "1" ); |
|
|
|
assertContains(result, "<a href=\"#userref\">"); |
|
|
|
assertContains(result, "<a name=\"userref\">"); |
|
|
|
assertContains(result, "1"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testPicture() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "picture.doc", true ); |
|
|
|
String result = getHtmlText( "picture.doc", true); |
|
|
|
|
|
|
|
// picture |
|
|
|
assertContains( result, "src=\"0.emf\"" ); |
|
|
|
assertContains(result, "src=\"0.emf\""); |
|
|
|
// visible size |
|
|
|
assertContains( result, "width:3.1305554in;height:1.7250001in;" ); |
|
|
|
assertContains(result, "width:3.1305554in;height:1.7250001in;"); |
|
|
|
// shift due to crop |
|
|
|
assertContains( result, "left:-0.09375;top:-0.25694445;" ); |
|
|
|
assertContains(result, "left:-0.09375;top:-0.25694445;"); |
|
|
|
// size without crop |
|
|
|
assertContains( result, "width:3.4125in;height:2.325in;" ); |
|
|
|
assertContains(result, "width:3.4125in;height:2.325in;"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testPicturesEscher() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "pictures_escher.doc", true ); |
|
|
|
assertContains( result, "<img src=\"s0.PNG\">" ); |
|
|
|
assertContains( result, "<img src=\"s808.PNG\">" ); |
|
|
|
String result = getHtmlText( "pictures_escher.doc", true); |
|
|
|
assertContains(result, "<img src=\"s0.PNG\">"); |
|
|
|
assertContains(result, "<img src=\"s808.PNG\">"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testTableMerges() throws Exception |
|
|
|
{ |
|
|
|
String result = getHtmlText( "table-merges.doc" ); |
|
|
|
String result = getHtmlText( "table-merges.doc"); |
|
|
|
|
|
|
|
assertContains(result, "<td class=\"td1\" colspan=\"3\">"); |
|
|
|
assertContains(result, "<td class=\"td2\" colspan=\"2\">"); |
|
|
|
} |
|
|
|
|
|
|
|
public void testBug52420() throws Exception { |
|
|
|
String result = getHtmlText( "52420.doc"); |
|
|
|
|
|
|
|
assertFalse(result.contains( "FORMTEXT" )); |
|
|
|
|
|
|
|
assertContains( result, "<td class=\"td1\" colspan=\"3\">" ); |
|
|
|
assertContains( result, "<td class=\"td2\" colspan=\"2\">" ); |
|
|
|
assertContains(result, "\u0417\u0410\u0414\u0410\u041d\u0418\u0415"); |
|
|
|
assertContains(result, "\u041f\u0440\u0435\u043f\u043e\u0434\u0430\u0432\u0430\u0442\u0435\u043b\u044c"); |
|
|
|
} |
|
|
|
} |