You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PDFEncodingTestCase.java 5.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.render.pdf;
  19. import java.io.File;
  20. import java.io.IOException;
  21. import org.junit.Test;
  22. import org.xml.sax.SAXException;
  23. import static org.junit.Assert.assertEquals;
  24. import static org.junit.Assert.assertTrue;
  25. import org.apache.pdfbox.pdmodel.PDDocument;
  26. import org.apache.pdfbox.text.PDFTextStripper;
  27. import org.apache.fop.apps.FOUserAgent;
  28. /** Test that characters are correctly encoded in a generated PDF file */
  29. public class PDFEncodingTestCase extends BasePDFTest {
  30. private File foBaseDir = new File("test/xml/pdf-encoding");
  31. private final boolean dumpPDF = Boolean.getBoolean("PDFEncodingTestCase.dumpPDF");
  32. static final String INPUT_FILE = "test/xml/pdf-encoding/pdf-encoding-test.xconf";
  33. static final String TEST_MARKER = "PDFE_TEST_MARK_";
  34. public PDFEncodingTestCase() throws SAXException, IOException {
  35. super(INPUT_FILE);
  36. }
  37. /**
  38. * create an FOUserAgent for our tests
  39. * @return an initialized FOUserAgent
  40. */
  41. protected FOUserAgent getUserAgent() {
  42. final FOUserAgent a = fopFactory.newFOUserAgent();
  43. return a;
  44. }
  45. /**
  46. * Test using a standard FOP font
  47. * @throws Exception checkstyle wants a comment here, even a silly one
  48. */
  49. @Test
  50. public void testPDFEncodingWithStandardFont() throws Exception {
  51. /* If the PDF encoding is correct, a text dump of the generated PDF file contains this (excerpts)
  52. * ...Tm [(PDFE_TEST_MARK_2:) ( ) (This) ( ) (is) ...(acute:) ( ) (XX_\351_XX) ] TJ
  53. * ...Tm [(PDFE_TEST_MARK_3:) ( ) (This) ( ) (is) ...(letter:) ( ) (XX_\342\352\356\364\373_XX) ] TJ
  54. * The following array is used to look for these patterns
  55. */
  56. final String[] testPatterns = {
  57. TEST_MARKER + "1", "Standard",
  58. TEST_MARKER + "2", "XX_é_XX",
  59. TEST_MARKER + "3", "XX_âêîôû_XX"
  60. };
  61. runTest("test-standard-font.fo", testPatterns);
  62. }
  63. /**
  64. * Test encoding with a Custom Font using BMP characters.
  65. *
  66. * NB: The Gladiator font do not contain '_' Glyph
  67. *
  68. * @throws Exception
  69. * checkstyle wants a comment here, even a silly one
  70. */
  71. @Test
  72. public void testPDFEncodingWithCustomFont() throws Exception {
  73. /* If the PDF encoding is correct, a text dump of the generated PDF file contains this (excerpts)
  74. * ...Tm [(PDFE_TEST_MARK_2:) ( ) (This) ( ) (is) ...(acute:) ( ) (XX_\351_XX) ] TJ
  75. * ...Tm [(PDFE_TEST_MARK_3:) ( ) (This) ( ) (is) ...(letter:) ( ) (XX_\342\352\356\364\373_XX) ] TJ
  76. * The following array is used to look for these patterns
  77. */
  78. final String[] testPatterns = {
  79. TEST_MARKER + "1", "Gladiator",
  80. TEST_MARKER + "2", "XX_é_XX",
  81. TEST_MARKER + "3", "XX_âêîôû_XX"
  82. };
  83. runTest("test-custom-font.fo", testPatterns);
  84. }
  85. /**
  86. * Test encoding with a Custom Font using non-BMP characters
  87. *
  88. * @throws Exception
  89. * checkstyle wants a comment here, even a silly one
  90. */
  91. @Test
  92. public void testPDFEncodingWithNonBMPFont() throws Exception {
  93. final String[] testPatterns = {
  94. TEST_MARKER + "1", "AndroidEmoji",
  95. TEST_MARKER + "2", "\uD800\uDF00",
  96. };
  97. runTest("test-custom-non-bmp-font.fo", testPatterns);
  98. }
  99. /** Test encoding using specified input file and test patterns array */
  100. private void runTest(String inputFile, String[] testPatterns)
  101. throws Exception {
  102. File foFile = new File(foBaseDir, inputFile);
  103. byte[] pdfData = convertFO(foFile, getUserAgent(), dumpPDF);
  104. checkEncoding(pdfData, testPatterns);
  105. }
  106. /**
  107. * Check character encodings in the generated PDF data, by reading text
  108. * lines identified by markers and checking their content
  109. *
  110. * @throws IOException
  111. */
  112. private void checkEncoding(byte[] pdf, String[] testPattern)
  113. throws IOException {
  114. String s = extractTextFromPDF(pdf);
  115. int markersFound = 0;
  116. for (String line : s.split("\n")) {
  117. if (!line.contains(TEST_MARKER)) {
  118. continue;
  119. }
  120. markersFound++;
  121. for (int i = 0; i < testPattern.length; i++) {
  122. String marker = testPattern[i];
  123. String pattern = testPattern[++i];
  124. if (!line.contains(marker)) {
  125. continue;
  126. }
  127. String msg = String.format("line containing '%s' must contain '%s'", marker, pattern);
  128. assertTrue(msg, line.contains(pattern));
  129. }
  130. }
  131. final int nMarkers = testPattern.length / 2;
  132. assertEquals(nMarkers + " " + TEST_MARKER + " markers must be found",
  133. nMarkers, markersFound);
  134. }
  135. private static String extractTextFromPDF(byte[] pdfContent) throws IOException {
  136. PDFTextStripper pdfStripper = new PDFTextStripper();
  137. PDDocument pdDoc = PDDocument.load(pdfContent);
  138. return pdfStripper.getText(pdDoc);
  139. }
  140. }