diff options
author | Jeremias Maerki <jeremias@apache.org> | 2006-06-20 14:57:44 +0000 |
---|---|---|
committer | Jeremias Maerki <jeremias@apache.org> | 2006-06-20 14:57:44 +0000 |
commit | a85bb5953af9a945b18880ffd6b8b0f5200ed447 (patch) | |
tree | 905aabf5d6b39b57135339dc137ffd73656df2d0 /src/java/org/apache/fop/pdf/PDFMetadata.java | |
parent | ecd9851991497df7b4bb2c9f1d72c66d6ac71a29 (diff) | |
download | xmlgraphics-fop-a85bb5953af9a945b18880ffd6b8b0f5200ed447.tar.gz xmlgraphics-fop-a85bb5953af9a945b18880ffd6b8b0f5200ed447.zip |
XML Graphics Commons 1.0 replaced with a SVN snapshot (containing the XMP framework).
Reworked the XMP support for PDF to use the XMP framework from XML Graphics Commons.
XMP metadata embedded in fo:declarations is now properly handled. Its values are copied to the Info object according to the rules from PDF/A-1.
Metadata values from the user agent (title, author etc.) are properly merged with metadata from the XSL-FO document. UserAgent metadata overrides FO metadata.
Note: This is useful and active even if you don't activate PDF/A support.
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@415688 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/java/org/apache/fop/pdf/PDFMetadata.java')
-rw-r--r-- | src/java/org/apache/fop/pdf/PDFMetadata.java | 275 |
1 files changed, 99 insertions, 176 deletions
diff --git a/src/java/org/apache/fop/pdf/PDFMetadata.java b/src/java/org/apache/fop/pdf/PDFMetadata.java index 94e31372d..5a4d20d5d 100644 --- a/src/java/org/apache/fop/pdf/PDFMetadata.java +++ b/src/java/org/apache/fop/pdf/PDFMetadata.java @@ -20,25 +20,23 @@ package org.apache.fop.pdf; import java.io.IOException; import java.io.OutputStream; -import java.text.DateFormat; -import java.text.SimpleDateFormat; -import java.util.Calendar; import java.util.Date; -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; -import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; -import org.apache.fop.fo.ElementMapping; -import org.apache.fop.fo.extensions.xmp.XMPConstants; +import org.apache.xmlgraphics.xmp.Metadata; +import org.apache.xmlgraphics.xmp.XMPSerializer; +import org.apache.xmlgraphics.xmp.schemas.DublinCoreAdapter; +import org.apache.xmlgraphics.xmp.schemas.DublinCoreSchema; +import org.apache.xmlgraphics.xmp.schemas.XMPBasicAdapter; +import org.apache.xmlgraphics.xmp.schemas.XMPBasicSchema; +import org.apache.xmlgraphics.xmp.schemas.pdf.AdobePDFAdapter; +import org.apache.xmlgraphics.xmp.schemas.pdf.AdobePDFSchema; +import org.apache.xmlgraphics.xmp.schemas.pdf.PDFAAdapter; +import org.apache.xmlgraphics.xmp.schemas.pdf.PDFAOldXMPSchema; +import org.apache.xmlgraphics.xmp.schemas.pdf.PDFAXMPSchema; -import org.w3c.dom.DOMImplementation; -import org.w3c.dom.Document; -import org.w3c.dom.Element; +import org.xml.sax.SAXException; /** * Special PDFStream for Metadata. @@ -46,20 +44,15 @@ import org.w3c.dom.Element; */ public class PDFMetadata extends PDFStream { - private static final String XMLNS = "http://www.w3.org/2000/xmlns/"; - - private static DateFormat pseudoISO8601DateFormat = new SimpleDateFormat( - "yyyy'-'MM'-'dd'T'HH':'mm':'ss"); - - private Document xmpMetadata; + private Metadata xmpMetadata; private boolean readOnly = true; /** @see org.apache.fop.pdf.PDFObject#PDFObject() */ - public PDFMetadata(Document xmp, boolean readOnly) { + public PDFMetadata(Metadata xmp, boolean readOnly) { super(); if (xmp == null) { throw new NullPointerException( - "DOM Document representing the metadata must no be null"); + "The parameter for the XMP Document must not be null"); } this.xmpMetadata = xmp; this.readOnly = readOnly; @@ -80,6 +73,11 @@ public class PDFMetadata extends PDFStream { return false; //XMP metadata packet must be scannable by non PDF-compatible readers } + /** @return the XMP metadata */ + public Metadata getMetadata() { + return this.xmpMetadata; + } + /** * overload the base object method so we don't have to copy * byte arrays around so much @@ -94,39 +92,14 @@ public class PDFMetadata extends PDFStream { /** @see org.apache.fop.pdf.AbstractPDFStream#outputRawStreamData(java.io.OutputStream) */ protected void outputRawStreamData(OutputStream out) throws IOException { - final String encoding = "UTF-8"; - out.write("<?xpacket begin=\"\uFEFF\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" - .getBytes(encoding)); try { - TransformerFactory tFactory = TransformerFactory.newInstance(); - Transformer transformer = tFactory.newTransformer(); - transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); - transformer.setOutputProperty(OutputKeys.ENCODING, encoding); - transformer.setOutputProperty(OutputKeys.INDENT, "no"); - DOMSource src = new DOMSource(this.xmpMetadata); - StreamResult res = new StreamResult(out); - transformer.transform(src, res); - } catch (TransformerConfigurationException e) { + XMPSerializer.writeXMPPacket(xmpMetadata, out, this.readOnly); + } catch (TransformerConfigurationException tce) { throw new IOException("Error setting up Transformer for XMP stream serialization: " - + e.getMessage()); - } catch (TransformerException e) { + + tce.getMessage()); + } catch (SAXException saxe) { throw new IOException("Error while serializing XMP stream: " - + e.getMessage()); - } - if (readOnly) { - out.write("\n<?xpacket end=\"r\"?>".getBytes(encoding)); - } else { - //Create padding string (40 * 101 characters is more or less the recommended 4KB) - StringBuffer sb = new StringBuffer(101); - sb.append('\n'); - for (int i = 0; i < 100; i++) { - sb.append(" "); - } - byte[] padding = sb.toString().getBytes(encoding); - for (int i = 0; i < 40; i++) { - out.write(padding); - } - out.write("\n<?xpacket end=\"w\"?>".getBytes(encoding)); + + saxe.getMessage()); } } @@ -150,58 +123,15 @@ public class PDFMetadata extends PDFStream { } /** - * Formats a Date using ISO 8601 format in the default time zone. - * @param dt the date - * @return the formatted date - */ - public static String formatISO8601Date(Date dt) { - //ISO 8601 cannot be expressed directly using SimpleDateFormat - StringBuffer sb = new StringBuffer(pseudoISO8601DateFormat.format(dt)); - Calendar cal = Calendar.getInstance(); - cal.setTime(dt); - int offset = cal.get(Calendar.ZONE_OFFSET); - offset += cal.get(Calendar.DST_OFFSET); - offset /= (1000 * 60); //Convert to minutes - - if (offset == 0) { - sb.append('Z'); - } else { - int zoh = offset / 60; - int zom = Math.abs(offset % 60); - if (zoh > 0) { - sb.append('+'); - } else { - sb.append('-'); - } - if (zoh < 10) { - sb.append('0'); - } - sb.append(zoh); - sb.append(':'); - if (zom < 10) { - sb.append('0'); - } - sb.append(zom); - } - - return sb.toString(); - } - - /** * Creates an XMP document based on the settings on the PDF Document. * @param pdfDoc the PDF Document - * @return a DOM document representing the requested XMP metadata + * @return the requested XMP metadata */ - public static Document createXMPFromUserAgent(PDFDocument pdfDoc) { - DOMImplementation domImplementation = ElementMapping.getDefaultDOMImplementation(); - Document doc = domImplementation.createDocument( - XMPConstants.XMP_NAMESPACE, "x:xmpmeta", null); - Element rdf = doc.createElementNS(XMPConstants.RDF_NAMESPACE, "rdf:RDF"); - doc.getDocumentElement().appendChild(rdf); + public static Metadata createXMPFromUserAgent(PDFDocument pdfDoc) { + Metadata meta = new Metadata(); - Element desc, el; PDFInfo info = pdfDoc.getInfo(); - + //Set creation date if not available, yet if (info.getCreationDate() == null) { Date d = new Date(); @@ -213,105 +143,98 @@ public class PDFMetadata extends PDFStream { //error even if the times are essentially equal. //Dublin Core - desc = doc.createElementNS(XMPConstants.RDF_NAMESPACE, "rdf:Description"); - desc.setAttributeNS(XMPConstants.RDF_NAMESPACE, "rdf:about", ""); - desc.setAttributeNS(XMLNS, "xmlns:dc", XMPConstants.DUBLIN_CORE_NAMESPACE); - rdf.appendChild(desc); + DublinCoreAdapter dc = DublinCoreSchema.getAdapter(meta); if (info.getAuthor() != null) { - el = doc.createElementNS(XMPConstants.DUBLIN_CORE_NAMESPACE, "dc:creator"); - desc.appendChild(el); - Element seq = doc.createElementNS(XMPConstants.RDF_NAMESPACE, "rdf:Seq"); - el.appendChild(seq); - Element li = doc.createElementNS(XMPConstants.RDF_NAMESPACE, "rdf:li"); - seq.appendChild(li); - li.appendChild(doc.createTextNode(info.getAuthor())); + dc.addCreator(info.getAuthor()); } if (info.getTitle() != null) { - el = doc.createElementNS(XMPConstants.DUBLIN_CORE_NAMESPACE, "dc:title"); - desc.appendChild(el); - el.appendChild(doc.createTextNode(info.getTitle())); + dc.setTitle(info.getTitle()); } if (info.getSubject() != null) { - el = doc.createElementNS(XMPConstants.DUBLIN_CORE_NAMESPACE, "dc:subject"); - desc.appendChild(el); - el.appendChild(doc.createTextNode(info.getSubject())); + dc.addSubject(info.getSubject()); + } + dc.addDate(info.getCreationDate()); + + //PDF/A identification + PDFAMode pdfaMode = pdfDoc.getProfile().getPDFAMode(); + if (pdfaMode.isPDFA1LevelB()) { + PDFAAdapter pdfa = PDFAXMPSchema.getAdapter(meta); + //Create the identification a second time with the old namespace to keep + //Adobe Acrobat happy + PDFAAdapter pdfaOld = PDFAOldXMPSchema.getAdapter(meta); + pdfa.setPart(1); + pdfaOld.setPart(1); + if (pdfaMode == PDFAMode.PDFA_1A) { + pdfa.setConformance("A"); //PDF/A-1a + pdfaOld.setConformance("A"); //PDF/A-1a + } else { + pdfa.setConformance("B"); //PDF/A-1b + pdfaOld.setConformance("B"); //PDF/A-1b + } } - el = doc.createElementNS(XMPConstants.DUBLIN_CORE_NAMESPACE, "dc:date"); - desc.appendChild(el); - el.appendChild(doc.createTextNode(formatISO8601Date(info.getCreationDate()))); //XMP Basic Schema - desc = doc.createElementNS(XMPConstants.RDF_NAMESPACE, "rdf:Description"); - desc.setAttributeNS(XMPConstants.RDF_NAMESPACE, "rdf:about", ""); - desc.setAttributeNS(XMLNS, "xmlns:xmp", XMPConstants.XMP_BASIC_NAMESPACE); - rdf.appendChild(desc); - el = doc.createElementNS(XMPConstants.XMP_BASIC_NAMESPACE, "xmp:CreateDate"); - desc.appendChild(el); - el.appendChild(doc.createTextNode(formatISO8601Date(info.getCreationDate()))); + XMPBasicAdapter xmpBasic = XMPBasicSchema.getAdapter(meta); + xmpBasic.setCreateDate(info.getCreationDate()); PDFProfile profile = pdfDoc.getProfile(); if (profile.isModDateRequired()) { - el = doc.createElementNS(XMPConstants.XMP_BASIC_NAMESPACE, "xmp:ModifyDate"); - desc.appendChild(el); - el.appendChild(doc.createTextNode(formatISO8601Date(info.getCreationDate()))); + xmpBasic.setModifyDate(info.getCreationDate()); } if (info.getCreator() != null) { - el = doc.createElementNS(XMPConstants.XMP_BASIC_NAMESPACE, "xmp:CreatorTool"); - desc.appendChild(el); - el.appendChild(doc.createTextNode(info.getCreator())); + xmpBasic.setCreatorTool(info.getCreator()); } - - //Adobe PDF Schema - desc = doc.createElementNS(XMPConstants.RDF_NAMESPACE, "rdf:Description"); - desc.setAttributeNS(XMPConstants.RDF_NAMESPACE, "rdf:about", ""); - desc.setAttributeNS(XMLNS, "xmlns:pdf", XMPConstants.ADOBE_PDF_NAMESPACE); - rdf.appendChild(desc); + + AdobePDFAdapter adobePDF = AdobePDFSchema.getAdapter(meta); if (info.getKeywords() != null) { - el = doc.createElementNS(XMPConstants.ADOBE_PDF_NAMESPACE, "pdf:Keywords"); - desc.appendChild(el); - el.appendChild(doc.createTextNode(info.getKeywords())); + adobePDF.setKeywords(info.getKeywords()); } if (info.getProducer() != null) { - el = doc.createElementNS(XMPConstants.ADOBE_PDF_NAMESPACE, "pdf:Producer"); - desc.appendChild(el); - el.appendChild(doc.createTextNode(info.getProducer())); + adobePDF.setProducer(info.getProducer()); } - el = doc.createElementNS(XMPConstants.ADOBE_PDF_NAMESPACE, "pdf:PDFVersion"); - desc.appendChild(el); - el.appendChild(doc.createTextNode(pdfDoc.getPDFVersionString())); + adobePDF.setPDFVersion(pdfDoc.getPDFVersionString()); - //PDF/A identification - PDFAMode pdfaMode = pdfDoc.getProfile().getPDFAMode(); - if (pdfaMode.isPDFA1LevelB()) { - createPDFAIndentification(doc, rdf, - XMPConstants.PDF_A_IDENTIFICATION, "pdfaid", pdfaMode); - //Create the identification a second time with the old namespace to keep - //Adobe Acrobat happy - createPDFAIndentification(doc, rdf, - XMPConstants.PDF_A_IDENTIFICATION_OLD, "pdfaid_1", pdfaMode); - } - return doc; + return meta; } - private static void createPDFAIndentification(Document doc, Element rdf, - String pdfaNamespace, String prefix, PDFAMode pdfaMode) { - Element desc; - Element el; - desc = doc.createElementNS(XMPConstants.RDF_NAMESPACE, "rdf:Description"); - desc.setAttributeNS(XMPConstants.RDF_NAMESPACE, "rdf:about", ""); - desc.setAttributeNS(XMLNS, "xmlns:" + prefix, pdfaNamespace); - rdf.appendChild(desc); - el = doc.createElementNS(pdfaNamespace, prefix + ":part"); - desc.appendChild(el); - el.appendChild(doc.createTextNode("1")); //PDF/A-1 - el = doc.createElementNS(pdfaNamespace, prefix + ":conformance"); - desc.appendChild(el); - if (pdfaMode == PDFAMode.PDFA_1A) { - el.appendChild(doc.createTextNode("A")); //PDF/A-1a + /** + * Updates the values in the Info object from the XMP metadata according to the rules defined + * in PDF/A-1 (ISO 19005-1:2005) + * @param meta the metadata + * @param info the Info object + */ + public static void updateInfoFromMetadata(Metadata meta, PDFInfo info) { + DublinCoreAdapter dc = DublinCoreSchema.getAdapter(meta); + info.setTitle(dc.getTitle()); + String[] creators = dc.getCreators(); + if (creators != null && creators.length > 0) { + info.setAuthor(creators[0]); } else { - el.appendChild(doc.createTextNode("B")); //PDF/A-1b + info.setAuthor(null); } + String[] subjects = dc.getSubjects(); + //PDF/A-1 defines dc:subject as "Text" but XMP defines it as "bag Text". + //We're simply doing the inverse from createXMPFromUserAgent() above. + if (subjects != null && subjects.length > 0) { + info.setSubject(subjects[0]); + } else { + info.setSubject(null); + } + + AdobePDFAdapter pdf = AdobePDFSchema.getAdapter(meta); + info.setKeywords(pdf.getKeywords()); + info.setProducer(pdf.getProducer()); + + XMPBasicAdapter xmpBasic = XMPBasicSchema.getAdapter(meta); + info.setCreator(xmpBasic.getCreatorTool()); + Date d; + d = xmpBasic.getCreateDate(); + xmpBasic.setCreateDate(d); //To make Adobe Acrobat happy (bug filed with Adobe) + //Adobe Acrobat doesn't like it when the xmp:CreateDate has a different timezone + //than Info/CreationDate + info.setCreationDate(d); + d = xmpBasic.getModifyDate(); + xmpBasic.setModifyDate(d); + info.setModDate(d); } - - } |