Moving the AFP parser from XGC to FOP for now.

git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@995762 13f79535-47bb-0310-9956-ffa450edef68
author: Jeremias Maerki <jeremias@apache.org> 2010-09-10 11:53:28 +0000
committer: Jeremias Maerki <jeremias@apache.org> 2010-09-10 11:53:28 +0000
commit: 964dd91787738d3bd3f3a57bab9ffe24cdba17e6 (patch)
tree: 1159e20d41096ab8cf804d705d705d96253b4cae /src/java/org
parent: f62155abf5536b893f6bd0d91176301ab5755c2d (diff)
download: xmlgraphics-fop-964dd91787738d3bd3f3a57bab9ffe24cdba17e6.tar.gz
xmlgraphics-fop-964dd91787738d3bd3f3a57bab9ffe24cdba17e6.zip
3 files changed, 514 insertions, 0 deletions
diff --git a/src/java/org/apache/fop/afp/apps/FontPatternExtractor.java b/src/java/org/apache/fop/afp/apps/FontPatternExtractor.java
new file mode 100644
index 000000000..0adffcd8b
--- /dev/null
+++ b/src/java/org/apache/fop/afp/apps/FontPatternExtractor.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.afp.apps;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.PrintStream;
+
+import org.apache.commons.io.HexDump;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.io.output.ByteArrayOutputStream;
+
+import org.apache.fop.afp.parser.MODCAParser;
+import org.apache.fop.afp.parser.UnparsedStructuredField;
+
+/**
+ * This class represents a tool for extracting the Type 1 PFB file from an AFP outline font.
+ */
+public class FontPatternExtractor {
+
+    private PrintStream printStream = System.out;
+
+    /**
+     * Extracts the Type1 PFB file from the given AFP outline font.
+     * @param file the AFP file to read from
+     * @param targetDir the target directory where the PFB file is to be placed.
+     * @throws IOException if an I/O error occurs
+     */
+    public void extract(File file, File targetDir) throws IOException {
+        InputStream in = new java.io.FileInputStream(file);
+        try {
+            MODCAParser parser = new MODCAParser(in);
+            ByteArrayOutputStream baout = new ByteArrayOutputStream();
+            UnparsedStructuredField strucField;
+            while ((strucField = parser.readNextStructuredField()) != null) {
+                if (strucField.getSfTypeID() == 0xD3EE89) {
+                    println(strucField.toString());
+                    HexDump.dump(strucField.getData(), 0, printStream, 0);
+                    baout.write(strucField.getData());
+                }
+            }
+
+            ByteArrayInputStream bin = new ByteArrayInputStream(baout.toByteArray());
+            DataInputStream din = new DataInputStream(bin);
+            long len = din.readInt() & 0xFFFFFFFFL;
+            println("Length: " + len);
+            din.skip(4); //checksum
+            int tidLen = din.readUnsignedShort() - 2;
+            byte[] tid = new byte[tidLen];
+            din.readFully(tid);
+            String filename = new String(tid, "ISO-8859-1");
+            int asciiCount1 = countUSAsciiCharacters(filename);
+            String filenameEBCDIC = new String(tid, "Cp1146");
+            int asciiCount2 = countUSAsciiCharacters(filenameEBCDIC);
+            println("TID: " + filename + " " + filenameEBCDIC);
+
+            if (asciiCount2 > asciiCount1) {
+                //Haven't found an indicator if the name is encoded in EBCDIC or not
+                //so we use a trick.
+                filename = filenameEBCDIC;
+            }
+            if (!filename.toLowerCase().endsWith(".pfb")) {
+                filename = filename + ".pfb";
+            }
+            println("Output filename: " + filename);
+            File out = new File(targetDir, filename);
+
+            OutputStream fout = new java.io.FileOutputStream(out);
+            try {
+                IOUtils.copyLarge(din, fout);
+            } finally {
+                IOUtils.closeQuietly(fout);
+            }
+
+
+        } finally {
+            IOUtils.closeQuietly(in);
+        }
+    }
+
+    private void println(String s) {
+        printStream.println(s);
+    }
+
+    private void println() {
+        printStream.println();
+    }
+
+    private int countUSAsciiCharacters(String filename) {
+        int count = 0;
+        for (int i = 0, c = filename.length(); i < c; i++) {
+            if (filename.charAt(i) < 128) {
+                count++;
+            }
+        }
+        return count;
+    }
+
+    /**
+     * Main method
+     * @param args the command-line arguments
+     */
+    public static void main(String[] args) {
+        try {
+            FontPatternExtractor app = new FontPatternExtractor();
+
+            app.println("Font Pattern Extractor");
+            app.println();
+
+            if (args.length > 0) {
+                String filename = args[0];
+                File file = new File(filename);
+
+                File targetDir = file.getParentFile();
+                if (args.length > 1) {
+                    targetDir = new File(args[1]);
+                    targetDir.mkdirs();
+                }
+
+                app.extract(file, targetDir);
+            } else {
+                app.println("This tool tries to extract the PFB file from an AFP outline font.");
+                app.println();
+                app.println("Usage: Java -cp ... " + FontPatternExtractor.class.getName()
+                        + " <afp-font-file> [<target-dir>]");
+                System.exit(-1);
+            }
+
+
+        } catch (Exception e) {
+            e.printStackTrace();
+            System.exit(-1);
+        }
+    }
+
+}
diff --git a/src/java/org/apache/fop/afp/parser/MODCAParser.java b/src/java/org/apache/fop/afp/parser/MODCAParser.java
new file mode 100644
index 000000000..98058a38e
--- /dev/null
+++ b/src/java/org/apache/fop/afp/parser/MODCAParser.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.afp.parser;
+
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * An simple MO:DCA/AFP parser.
+ */
+public class MODCAParser {
+
+    private DataInputStream din;
+
+    /**
+     * Main constructor
+     * @param in the {@link InputStream} to read the AFP file from.
+     */
+    public MODCAParser(InputStream in) {
+        if (!in.markSupported()) {
+            in = new java.io.BufferedInputStream(in);
+        }
+        this.din = new DataInputStream(in);
+    }
+
+    /**
+     * Returns the {@link DataInputStream} used for parsing structured fields.
+     * @return the data input stream
+     */
+    public DataInputStream getDataInputStream() {
+        return this.din;
+    }
+
+    /**
+     * Reads the next structured field from the input stream.
+     * <p>
+     * No structure validation of the MO:DCA file is performed.
+     * @return a new unparsed structured field (or null when parsing is finished).
+     * @throws IOException if an I/O error occurs
+     */
+    public UnparsedStructuredField readNextStructuredField() throws IOException {
+        din.mark(1);
+        try {
+            byte b = din.readByte(); //Skip 0x5A character if necessary (ex. AFP)
+            if (b != 0x5A) {
+                din.reset(); //Not necessary for MO:DCA files
+            }
+        } catch (EOFException eof) {
+            return null;
+        }
+        return UnparsedStructuredField.readStructuredField(getDataInputStream());
+    }
+
+}
diff --git a/src/java/org/apache/fop/afp/parser/UnparsedStructuredField.java b/src/java/org/apache/fop/afp/parser/UnparsedStructuredField.java
new file mode 100644
index 000000000..c3dc726d4
--- /dev/null
+++ b/src/java/org/apache/fop/afp/parser/UnparsedStructuredField.java
@@ -0,0 +1,284 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.afp.parser;
+
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.text.DecimalFormat;
+
+import org.apache.commons.io.HexDump;
+
+/**
+ * Represents an unparsed (generic) AFP structured field.
+ */
+public class UnparsedStructuredField {
+
+    private short sfLength;
+    private byte sfClassCode;
+    private byte sfTypeCode;
+    private byte sfCategoryCode;
+    private boolean sfiExtensionPresent;
+    private boolean sfiSegmentedData;
+    private boolean sfiPaddingPresent;
+    private short extLength;
+    private byte[] extData;
+    private byte[] data;
+
+    /**
+     * Default constructor.
+     */
+    public UnparsedStructuredField() {
+        //nop
+    }
+
+    /**
+     * Reads a structured field from a {@link DataInputStream}. The resulting object can be
+     * further interpreted be follow-up code.
+     * @param din the stream to read from
+     * @return the generic structured field
+     * @throws IOException if an I/O error occurs
+     */
+    public static UnparsedStructuredField readStructuredField(DataInputStream din)
+            throws IOException {
+        short len;
+        try {
+            len = din.readShort();
+        } catch (EOFException eof) {
+            return null;
+        }
+        UnparsedStructuredField sf = new UnparsedStructuredField();
+        sf.sfLength = len;
+        sf.sfClassCode = din.readByte();
+        sf.sfTypeCode = din.readByte();
+        sf.sfCategoryCode = din.readByte();
+
+        byte f = din.readByte();
+        sf.sfiExtensionPresent = (f & 0x01) != 0;
+        sf.sfiSegmentedData = (f & 0x04) != 0;
+        sf.sfiPaddingPresent = (f & 0x10) != 0;
+        din.skip(2); //Reserved
+
+        int dataLength = sf.sfLength - 8;
+        if (sf.sfiExtensionPresent) {
+            sf.extLength = (short)(((short)din.readByte()) & 0xFF);
+            sf.extData = new byte[sf.extLength - 1];
+            din.readFully(sf.extData);
+            dataLength -= sf.extLength;
+        }
+        sf.data = new byte[dataLength];
+        din.readFully(sf.data);
+        return sf;
+    }
+
+    /** {@inheritDoc} */
+    public String toString() {
+        StringBuffer sb = new StringBuffer("Structured Field: ");
+        sb.append(Integer.toHexString(getSfTypeID()).toUpperCase());
+        sb.append(", len=");
+        sb.append(new DecimalFormat("00000").format(getSfLength()));
+        sb.append(" ").append(getTypeCodeAsString());
+        sb.append(" ").append(getCategoryCodeAsString());
+        if (isSfiExtensionPresent()) {
+            sb.append(", SFI extension present");
+        }
+        if (isSfiSegmentedData()) {
+            sb.append(", segmented data");
+        }
+        if (isSfiPaddingPresent()) {
+            sb.append(", with padding");
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Dump the structured field as hex data to the given {@link PrintStream}.
+     * @param out the {@link PrintStream} to dump to
+     * @throws IOException if an I/O error occurs
+     */
+    public void dump(PrintStream out) throws IOException {
+        out.println(toString());
+        HexDump.dump(getData(), 0, out, 0);
+    }
+
+    /**
+     * Dump the structured field as hex data to <code>System.out</code>.
+     * @throws IOException if an I/O error occurs
+     */
+    public void dump() throws IOException {
+        dump(System.out);
+    }
+
+    /**
+     * Returns type code function name for this field.
+     * @return the type code function name
+     */
+    public String getTypeCodeAsString() {
+        switch ((int)getSfTypeCode() & 0xFF) {
+        case 0xA0: return "Attribute";
+        case 0xA2: return "CopyCount";
+        case 0xA6: return "Descriptor";
+        case 0xA7: return "Control";
+        case 0xA8: return "Begin";
+        case 0xA9: return "End";
+        case 0xAB: return "Map";
+        case 0xAC: return "Position";
+        case 0xAD: return "Process";
+        case 0xAF: return "Include";
+        case 0xB0: return "Table";
+        case 0xB1: return "Migration";
+        case 0xB2: return "Variable";
+        case 0xB4: return "Link";
+        case 0xEE: return "Data";
+        default: return "Unknown:" + Integer.toHexString((int)getSfTypeCode()).toUpperCase();
+        }
+    }
+
+    /**
+     * Returns category code function name for this field.
+     * @return the category code function name
+     */
+    public String getCategoryCodeAsString() {
+        switch ((int)getSfCategoryCode() & 0xFF) {
+        case 0x5F: return "Page Segment";
+        case 0x6B: return "Object Area";
+        case 0x77: return "Color Attribute Table";
+        case 0x7B: return "IM Image";
+        case 0x88: return "Medium";
+        case 0x8A: return "Coded Font";
+        case 0x90: return "Process Element";
+        case 0x92: return "Object Container";
+        case 0x9B: return "Presentation Text";
+        case 0xA7: return "Index";
+        case 0xA8: return "Document";
+        case 0xAD: return "Page Group";
+        case 0xAF: return "Page";
+        case 0xBB: return "Graphics";
+        case 0xC3: return "Data Resource";
+        case 0xC4: return "Document Environment Group (DEG)";
+        case 0xC6: return "Resource Group";
+        case 0xC7: return "Object Environment Group (OEG)";
+        case 0xC9: return "Active Environment Group (AEG)";
+        case 0xCC: return "Medium Map";
+        case 0xCD: return "Form Map";
+        case 0xCE: return "Name Resource";
+        case 0xD8: return "Page Overlay";
+        case 0xD9: return "Resource Environment Group (REG)";
+        case 0xDF: return "Overlay";
+        case 0xEA: return "Data Supression";
+        case 0xEB: return "Bar Code";
+        case 0xEE: return "No Operation";
+        case 0xFB: return "Image";
+        default: return "Unknown:" + Integer.toHexString((int)getSfTypeCode()).toUpperCase();
+        }
+    }
+
+    /**
+     * Returns the structured field's length.
+     * @return the field length
+     */
+    public short getSfLength() {
+        return sfLength;
+    }
+
+    /**
+     * Returns the structured field's identifier.
+     * @return the field identifier
+     */
+    public int getSfTypeID() {
+        return ((getSfClassCode() & 0xFF) << 16)
+                | ((getSfTypeCode() & 0xFF) << 8)
+                | (getSfCategoryCode() & 0xFF);
+    }
+
+    /**
+     * Returns the structured field's class code.
+     * @return the field class code
+     */
+    public byte getSfClassCode() {
+        return sfClassCode;
+    }
+
+    /**
+     * Returns the structured field's type code.
+     * @return the type code
+     */
+    public byte getSfTypeCode() {
+        return sfTypeCode;
+    }
+
+    /**
+     * Returns the structured field's category code.
+     * @return the sfCategoryCode
+     */
+    public byte getSfCategoryCode() {
+        return sfCategoryCode;
+    }
+
+    /**
+     * Indicates whether an field introducer extension is present.
+     * @return true if an field introducer extension is present
+     */
+    public boolean isSfiExtensionPresent() {
+        return sfiExtensionPresent;
+    }
+
+    /**
+     * Indicates whether segmented data is present.
+     * @return true if the data is segmented
+     */
+    public boolean isSfiSegmentedData() {
+        return sfiSegmentedData;
+    }
+
+    /**
+     * Indicates whether the data is padded.
+     * @return true if the data is padded
+     */
+    public boolean isSfiPaddingPresent() {
+        return sfiPaddingPresent;
+    }
+
+    /**
+     * Returns the length of the extension if present.
+     * @return the length of the extension (or 0 if no extension is present)
+     */
+    public short getExtLength() {
+        return extLength;
+    }
+
+    /**
+     * Returns the extension data if present.
+     * @return the extension data (or null if no extension is present)
+     */
+    public byte[] getExtData() {
+        return extData;
+    }
+
+    /**
+     * Returns the structured field's payload.
+     * @return the field's data
+     */
+    public byte[] getData() {
+        return data;
+    }
+
+}
author	Jeremias Maerki <jeremias@apache.org>	2010-09-10 11:53:28 +0000
committer	Jeremias Maerki <jeremias@apache.org>	2010-09-10 11:53:28 +0000
commit	964dd91787738d3bd3f3a57bab9ffe24cdba17e6 (patch)
tree	1159e20d41096ab8cf804d705d705d96253b4cae /src/java/org
parent	f62155abf5536b893f6bd0d91176301ab5755c2d (diff)
download	xmlgraphics-fop-964dd91787738d3bd3f3a57bab9ffe24cdba17e6.tar.gz xmlgraphics-fop-964dd91787738d3bd3f3a57bab9ffe24cdba17e6.zip