diff options
author | Jeremias Maerki <jeremias@apache.org> | 2010-09-10 11:53:28 +0000 |
---|---|---|
committer | Jeremias Maerki <jeremias@apache.org> | 2010-09-10 11:53:28 +0000 |
commit | 964dd91787738d3bd3f3a57bab9ffe24cdba17e6 (patch) | |
tree | 1159e20d41096ab8cf804d705d705d96253b4cae /src/java/org | |
parent | f62155abf5536b893f6bd0d91176301ab5755c2d (diff) | |
download | xmlgraphics-fop-964dd91787738d3bd3f3a57bab9ffe24cdba17e6.tar.gz xmlgraphics-fop-964dd91787738d3bd3f3a57bab9ffe24cdba17e6.zip |
Moving the AFP parser from XGC to FOP for now.
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@995762 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/java/org')
3 files changed, 514 insertions, 0 deletions
diff --git a/src/java/org/apache/fop/afp/apps/FontPatternExtractor.java b/src/java/org/apache/fop/afp/apps/FontPatternExtractor.java new file mode 100644 index 000000000..0adffcd8b --- /dev/null +++ b/src/java/org/apache/fop/afp/apps/FontPatternExtractor.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.afp.apps; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.PrintStream; + +import org.apache.commons.io.HexDump; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.output.ByteArrayOutputStream; + +import org.apache.fop.afp.parser.MODCAParser; +import org.apache.fop.afp.parser.UnparsedStructuredField; + +/** + * This class represents a tool for extracting the Type 1 PFB file from an AFP outline font. + */ +public class FontPatternExtractor { + + private PrintStream printStream = System.out; + + /** + * Extracts the Type1 PFB file from the given AFP outline font. + * @param file the AFP file to read from + * @param targetDir the target directory where the PFB file is to be placed. + * @throws IOException if an I/O error occurs + */ + public void extract(File file, File targetDir) throws IOException { + InputStream in = new java.io.FileInputStream(file); + try { + MODCAParser parser = new MODCAParser(in); + ByteArrayOutputStream baout = new ByteArrayOutputStream(); + UnparsedStructuredField strucField; + while ((strucField = parser.readNextStructuredField()) != null) { + if (strucField.getSfTypeID() == 0xD3EE89) { + println(strucField.toString()); + HexDump.dump(strucField.getData(), 0, printStream, 0); + baout.write(strucField.getData()); + } + } + + ByteArrayInputStream bin = new ByteArrayInputStream(baout.toByteArray()); + DataInputStream din = new DataInputStream(bin); + long len = din.readInt() & 0xFFFFFFFFL; + println("Length: " + len); + din.skip(4); //checksum + int tidLen = din.readUnsignedShort() - 2; + byte[] tid = new byte[tidLen]; + din.readFully(tid); + String filename = new String(tid, "ISO-8859-1"); + int asciiCount1 = countUSAsciiCharacters(filename); + String filenameEBCDIC = new String(tid, "Cp1146"); + int asciiCount2 = countUSAsciiCharacters(filenameEBCDIC); + println("TID: " + filename + " " + filenameEBCDIC); + + if (asciiCount2 > asciiCount1) { + //Haven't found an indicator if the name is encoded in EBCDIC or not + //so we use a trick. + filename = filenameEBCDIC; + } + if (!filename.toLowerCase().endsWith(".pfb")) { + filename = filename + ".pfb"; + } + println("Output filename: " + filename); + File out = new File(targetDir, filename); + + OutputStream fout = new java.io.FileOutputStream(out); + try { + IOUtils.copyLarge(din, fout); + } finally { + IOUtils.closeQuietly(fout); + } + + + } finally { + IOUtils.closeQuietly(in); + } + } + + private void println(String s) { + printStream.println(s); + } + + private void println() { + printStream.println(); + } + + private int countUSAsciiCharacters(String filename) { + int count = 0; + for (int i = 0, c = filename.length(); i < c; i++) { + if (filename.charAt(i) < 128) { + count++; + } + } + return count; + } + + /** + * Main method + * @param args the command-line arguments + */ + public static void main(String[] args) { + try { + FontPatternExtractor app = new FontPatternExtractor(); + + app.println("Font Pattern Extractor"); + app.println(); + + if (args.length > 0) { + String filename = args[0]; + File file = new File(filename); + + File targetDir = file.getParentFile(); + if (args.length > 1) { + targetDir = new File(args[1]); + targetDir.mkdirs(); + } + + app.extract(file, targetDir); + } else { + app.println("This tool tries to extract the PFB file from an AFP outline font."); + app.println(); + app.println("Usage: Java -cp ... " + FontPatternExtractor.class.getName() + + " <afp-font-file> [<target-dir>]"); + System.exit(-1); + } + + + } catch (Exception e) { + e.printStackTrace(); + System.exit(-1); + } + } + +} diff --git a/src/java/org/apache/fop/afp/parser/MODCAParser.java b/src/java/org/apache/fop/afp/parser/MODCAParser.java new file mode 100644 index 000000000..98058a38e --- /dev/null +++ b/src/java/org/apache/fop/afp/parser/MODCAParser.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.afp.parser; + +import java.io.DataInputStream; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; + +/** + * An simple MO:DCA/AFP parser. + */ +public class MODCAParser { + + private DataInputStream din; + + /** + * Main constructor + * @param in the {@link InputStream} to read the AFP file from. + */ + public MODCAParser(InputStream in) { + if (!in.markSupported()) { + in = new java.io.BufferedInputStream(in); + } + this.din = new DataInputStream(in); + } + + /** + * Returns the {@link DataInputStream} used for parsing structured fields. + * @return the data input stream + */ + public DataInputStream getDataInputStream() { + return this.din; + } + + /** + * Reads the next structured field from the input stream. + * <p> + * No structure validation of the MO:DCA file is performed. + * @return a new unparsed structured field (or null when parsing is finished). + * @throws IOException if an I/O error occurs + */ + public UnparsedStructuredField readNextStructuredField() throws IOException { + din.mark(1); + try { + byte b = din.readByte(); //Skip 0x5A character if necessary (ex. AFP) + if (b != 0x5A) { + din.reset(); //Not necessary for MO:DCA files + } + } catch (EOFException eof) { + return null; + } + return UnparsedStructuredField.readStructuredField(getDataInputStream()); + } + +} diff --git a/src/java/org/apache/fop/afp/parser/UnparsedStructuredField.java b/src/java/org/apache/fop/afp/parser/UnparsedStructuredField.java new file mode 100644 index 000000000..c3dc726d4 --- /dev/null +++ b/src/java/org/apache/fop/afp/parser/UnparsedStructuredField.java @@ -0,0 +1,284 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.afp.parser; + +import java.io.DataInputStream; +import java.io.EOFException; +import java.io.IOException; +import java.io.PrintStream; +import java.text.DecimalFormat; + +import org.apache.commons.io.HexDump; + +/** + * Represents an unparsed (generic) AFP structured field. + */ +public class UnparsedStructuredField { + + private short sfLength; + private byte sfClassCode; + private byte sfTypeCode; + private byte sfCategoryCode; + private boolean sfiExtensionPresent; + private boolean sfiSegmentedData; + private boolean sfiPaddingPresent; + private short extLength; + private byte[] extData; + private byte[] data; + + /** + * Default constructor. + */ + public UnparsedStructuredField() { + //nop + } + + /** + * Reads a structured field from a {@link DataInputStream}. The resulting object can be + * further interpreted be follow-up code. + * @param din the stream to read from + * @return the generic structured field + * @throws IOException if an I/O error occurs + */ + public static UnparsedStructuredField readStructuredField(DataInputStream din) + throws IOException { + short len; + try { + len = din.readShort(); + } catch (EOFException eof) { + return null; + } + UnparsedStructuredField sf = new UnparsedStructuredField(); + sf.sfLength = len; + sf.sfClassCode = din.readByte(); + sf.sfTypeCode = din.readByte(); + sf.sfCategoryCode = din.readByte(); + + byte f = din.readByte(); + sf.sfiExtensionPresent = (f & 0x01) != 0; + sf.sfiSegmentedData = (f & 0x04) != 0; + sf.sfiPaddingPresent = (f & 0x10) != 0; + din.skip(2); //Reserved + + int dataLength = sf.sfLength - 8; + if (sf.sfiExtensionPresent) { + sf.extLength = (short)(((short)din.readByte()) & 0xFF); + sf.extData = new byte[sf.extLength - 1]; + din.readFully(sf.extData); + dataLength -= sf.extLength; + } + sf.data = new byte[dataLength]; + din.readFully(sf.data); + return sf; + } + + /** {@inheritDoc} */ + public String toString() { + StringBuffer sb = new StringBuffer("Structured Field: "); + sb.append(Integer.toHexString(getSfTypeID()).toUpperCase()); + sb.append(", len="); + sb.append(new DecimalFormat("00000").format(getSfLength())); + sb.append(" ").append(getTypeCodeAsString()); + sb.append(" ").append(getCategoryCodeAsString()); + if (isSfiExtensionPresent()) { + sb.append(", SFI extension present"); + } + if (isSfiSegmentedData()) { + sb.append(", segmented data"); + } + if (isSfiPaddingPresent()) { + sb.append(", with padding"); + } + return sb.toString(); + } + + /** + * Dump the structured field as hex data to the given {@link PrintStream}. + * @param out the {@link PrintStream} to dump to + * @throws IOException if an I/O error occurs + */ + public void dump(PrintStream out) throws IOException { + out.println(toString()); + HexDump.dump(getData(), 0, out, 0); + } + + /** + * Dump the structured field as hex data to <code>System.out</code>. + * @throws IOException if an I/O error occurs + */ + public void dump() throws IOException { + dump(System.out); + } + + /** + * Returns type code function name for this field. + * @return the type code function name + */ + public String getTypeCodeAsString() { + switch ((int)getSfTypeCode() & 0xFF) { + case 0xA0: return "Attribute"; + case 0xA2: return "CopyCount"; + case 0xA6: return "Descriptor"; + case 0xA7: return "Control"; + case 0xA8: return "Begin"; + case 0xA9: return "End"; + case 0xAB: return "Map"; + case 0xAC: return "Position"; + case 0xAD: return "Process"; + case 0xAF: return "Include"; + case 0xB0: return "Table"; + case 0xB1: return "Migration"; + case 0xB2: return "Variable"; + case 0xB4: return "Link"; + case 0xEE: return "Data"; + default: return "Unknown:" + Integer.toHexString((int)getSfTypeCode()).toUpperCase(); + } + } + + /** + * Returns category code function name for this field. + * @return the category code function name + */ + public String getCategoryCodeAsString() { + switch ((int)getSfCategoryCode() & 0xFF) { + case 0x5F: return "Page Segment"; + case 0x6B: return "Object Area"; + case 0x77: return "Color Attribute Table"; + case 0x7B: return "IM Image"; + case 0x88: return "Medium"; + case 0x8A: return "Coded Font"; + case 0x90: return "Process Element"; + case 0x92: return "Object Container"; + case 0x9B: return "Presentation Text"; + case 0xA7: return "Index"; + case 0xA8: return "Document"; + case 0xAD: return "Page Group"; + case 0xAF: return "Page"; + case 0xBB: return "Graphics"; + case 0xC3: return "Data Resource"; + case 0xC4: return "Document Environment Group (DEG)"; + case 0xC6: return "Resource Group"; + case 0xC7: return "Object Environment Group (OEG)"; + case 0xC9: return "Active Environment Group (AEG)"; + case 0xCC: return "Medium Map"; + case 0xCD: return "Form Map"; + case 0xCE: return "Name Resource"; + case 0xD8: return "Page Overlay"; + case 0xD9: return "Resource Environment Group (REG)"; + case 0xDF: return "Overlay"; + case 0xEA: return "Data Supression"; + case 0xEB: return "Bar Code"; + case 0xEE: return "No Operation"; + case 0xFB: return "Image"; + default: return "Unknown:" + Integer.toHexString((int)getSfTypeCode()).toUpperCase(); + } + } + + /** + * Returns the structured field's length. + * @return the field length + */ + public short getSfLength() { + return sfLength; + } + + /** + * Returns the structured field's identifier. + * @return the field identifier + */ + public int getSfTypeID() { + return ((getSfClassCode() & 0xFF) << 16) + | ((getSfTypeCode() & 0xFF) << 8) + | (getSfCategoryCode() & 0xFF); + } + + /** + * Returns the structured field's class code. + * @return the field class code + */ + public byte getSfClassCode() { + return sfClassCode; + } + + /** + * Returns the structured field's type code. + * @return the type code + */ + public byte getSfTypeCode() { + return sfTypeCode; + } + + /** + * Returns the structured field's category code. + * @return the sfCategoryCode + */ + public byte getSfCategoryCode() { + return sfCategoryCode; + } + + /** + * Indicates whether an field introducer extension is present. + * @return true if an field introducer extension is present + */ + public boolean isSfiExtensionPresent() { + return sfiExtensionPresent; + } + + /** + * Indicates whether segmented data is present. + * @return true if the data is segmented + */ + public boolean isSfiSegmentedData() { + return sfiSegmentedData; + } + + /** + * Indicates whether the data is padded. + * @return true if the data is padded + */ + public boolean isSfiPaddingPresent() { + return sfiPaddingPresent; + } + + /** + * Returns the length of the extension if present. + * @return the length of the extension (or 0 if no extension is present) + */ + public short getExtLength() { + return extLength; + } + + /** + * Returns the extension data if present. + * @return the extension data (or null if no extension is present) + */ + public byte[] getExtData() { + return extData; + } + + /** + * Returns the structured field's payload. + * @return the field's data + */ + public byte[] getData() { + return data; + } + +} |