mirrors
/
fop
mirror of https://github.com/apache/fop.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655
							/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* $Id$ */

package org.apache.fop.fonts.type1;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Scanner;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class PostscriptParser {

    protected static final Log LOG = LogFactory.getLog(PostscriptParser.class);
    /* Patterns used to identify Postscript elements */
    private static final String DICTIONARY = "dict";
    private static final String FIXED_ARRAY = "array";
    private static final String VARIABLE_ARRAY = "[";
    private static final String SUBROUTINE = "{";
    /* A list of parsed subroutines so if they are encountered during the parsing
     * phase of another element, they can be read and pattern matched. */
    private HashMap<String, PSSubroutine> subroutines = new HashMap<String, PSSubroutine>();

    /**
     * Parses the postscript document and returns a list of elements
     * @param segment The byte array containing the postscript data
     * @return A list of found Postscript elements
     * @throws IOException
     */
    public List<PSElement> parse(byte[] segment) throws IOException {
        List<PSElement> parsedElements = new ArrayList<PSElement>();
        /* Currently only scan and store the top level element. For deeper
         * Postscript parsing you can push and pop elements from a stack */
        PSElement foundElement = null;
        String operator = null;
        StringBuilder token = new StringBuilder();
        List<String> tokens = new ArrayList<String>();
        int startPoint = -1;
        boolean specialDelimiter = false;
        boolean lastWasSpecial = false;
        for (int i = 0; i < segment.length; i++) {
            byte cur = segment[i];
            if (foundElement != null && foundElement.hasMore()) {
                foundElement.parse(cur, i);
                continue;
            } else {
                char c = (char)cur;
                if (!lastWasSpecial) {
                    specialDelimiter = (c == '{' || c == '}' || c == '[' || c == ']'
                            || (!token.toString().equals("") && c == '/'));
                    boolean isNotBreak = !(c == ' ' || c == '\r' || cur == 15 || cur == 12
                            || cur == 10);
                    if (isNotBreak && !specialDelimiter) {
                        token.append(c);
                        continue;
                    }
                } else {
                    lastWasSpecial = false;
                    token.append(c);
                    if (token.toString().equals("/")) {
                        continue;
                    }
                }
            }
            try {
                boolean setOp = false;
                if ((foundElement == null || !foundElement.hasMore()) && token.length() > 1
                        && token.charAt(0) == '/' && tokens.size() != 1 || hasEndToken(token.toString())) {
                    operator = token.toString();
                    setOp = true;
                    if (tokens.size() > 2 && tokens.get(tokens.size() - 1).equals("def")) {
                        PSVariable newVar = new PSVariable(tokens.get(0), startPoint);
                        newVar.setValue(tokens.get(1));
                        newVar.setEndPoint(i - operator.length());
                        parsedElements.add(newVar);
                    }
                    tokens.clear();
                    startPoint = i - token.length();
                }
                if (operator != null) {
                    if (foundElement instanceof PSSubroutine) {
                        PSSubroutine sub = (PSSubroutine)foundElement;
                        subroutines.put(sub.getOperator(), sub);
                        parsedElements.add(sub);
                        if (!setOp) {
                            operator = "";
                        }
                    } else {
                        if (foundElement != null) {
                            if (!hasMatch(foundElement.getOperator(), parsedElements)) {
                                parsedElements.add(foundElement);
                            } else {
                                LOG.warn("Duplicate " + foundElement.getOperator()
                                        + " in font file, Ignoring.");
                            }
                        }
                    }
                    //Compare token against patterns and create an element if matched
                    foundElement = createElement(operator, token.toString(), startPoint);
                }
            } finally {
                tokens.add(token.toString());
                token = new StringBuilder();
                if (specialDelimiter) {
                    specialDelimiter = false;
                    lastWasSpecial = true;
                    //Retrace special postscript character so it can be processed separately
                    i--;
                }
            }
        }
        return parsedElements;
    }

    private boolean hasEndToken(String token) {
        return token.equals("currentdict");
    }

    private boolean hasMatch(String operator, List<PSElement> elements) {
        for (PSElement element : elements) {
            if (element.getOperator().equals(operator)) {
                return true;
            }
        }
        return false;
    }

    public PSElement createElement(String operator, String elementID, int startPoint) {
        if (operator.equals("")) {
            return null;
        }
        if (elementID.equals(FIXED_ARRAY)) {
            return new PSFixedArray(operator, startPoint);
        } else if (elementID.equals(VARIABLE_ARRAY)) {
            return new PSVariableArray(operator, startPoint);
        } else if (elementID.equals(SUBROUTINE)) {
            return new PSSubroutine(operator, startPoint);
        } else if (!operator.equals("/Private") && elementID.equals(DICTIONARY)) {
            return new PSDictionary(operator, startPoint);
        }
        return null;
    }

    /**
     * A base Postscript element class
     */
    public abstract class PSElement {
        /* The identifying operator for this element */
        protected String operator;
        private List<Byte> token;
        /* Determines whether there is any more data to be read whilst parsing */
        protected boolean hasMore = true;
        /* The locations of any entries containing binary data (e.g. arrays) */
        protected LinkedHashMap<String, int[]> binaryEntries;
        /* The tokens parsed from the current element */
        protected List<String> tokens;
        /* Determines whether binary data is currently being read / parsed */
        protected boolean readBinary = false;
        /* The location of the element within the binary data */
        private int startPoint = -1;
        protected int endPoint = -1;
        /* A flag to determine if unexpected postscript has been found in the element */
        private boolean foundUnexpected = false;

        public PSElement(String operator, int startPoint) {
            this.operator = operator;
            this.startPoint = startPoint;
            token = new ArrayList<Byte>();
            binaryEntries = new LinkedHashMap<String, int[]>();
            tokens = new ArrayList<String>();
        }

        /**
         * Gets the Postscript element operator
         * @return The operator returned as a string
         */
        public String getOperator() {
            return operator;
        }

        /**
         * The start location of the element within the source binary data
         * @return The start location returned as an integer
         */
        public int getStartPoint() {
            return startPoint;
        }

        /**
         * The end location of the element within the source binary data
         * @return The end location returned as an integer
         */
        public int getEndPoint() {
            return endPoint;
        }

        /**
         * Takes over the task of tokenizing the byte data
         * @param cur The current byte being read
         */
        public void parse(byte cur, int pos) throws UnsupportedEncodingException {
            if (!readBinary) {
                char c = (char)cur;
                boolean specialDelimiter = (c == '{' || c == '}' || c == '[' || c == ']'
                        || c == '(' || c == ')');
                boolean isNotValidBreak = !(c == ' ' || cur == 15 || cur == 12 || c == '\r'
                        || c == 10);
                if (isNotValidBreak && !specialDelimiter) {
                    token.add(cur);
                } else {
                    parseToken(pos);
                }
                if (specialDelimiter) {
                    token.add(cur);
                    parseToken(pos);
                }
            } else {
                parseByte(cur, pos);
            }
        }

        private void parseToken(int pos) throws UnsupportedEncodingException {
            byte[] bytesToken = new byte[token.size()];
            for (int i = 0; i < token.size(); i++) {
                bytesToken[i] = token.get(i).byteValue();
            }
            parseToken(new String(bytesToken, "ASCII"), pos);
            token.clear();
        }

        /**
         * Passes responsibility for processing the byte stream to the PostScript object
         * @param cur The byte currently being read
         * @param pos The position of the given byte
         */
        public abstract void parseByte(byte cur, int pos);

        /**
         * Delegates the parse routine to a sub class
         * @param token The token which to parse
         */
        public abstract void parseToken(String token, int curPos);

        protected boolean isInteger(String intValue) {
            try {
                Integer.parseInt(intValue);
                return true;
            } catch (NumberFormatException ex) {
                return false;
            }
        }

        public LinkedHashMap<String, int[]> getBinaryEntries() {
            return binaryEntries;
        }

        /**
         * Gets the binary entry location of a given index from the array
         * @param index The index for which to retrieve the binary data location
         * @return
         */
        public int[] getBinaryEntryByIndex(int index) {
            int count = 0;
            for (Entry<String, int[]> entry : binaryEntries.entrySet()) {
                if (count == index) {
                    return entry.getValue();
                }
                count++;
            }
            return new int[0];
        }

        /**
         * Determines if more data is still to be parsed for the Postscript element.
         * @return Returns true if more data exists
         */
        public boolean hasMore() {
            return hasMore;
        }

        /**
         * Sets a value to be true if an expected entry postscript is found in the element.
         * An example is where the encoding table may have a series of postscript operators
         * altering the state of the array. In this case the only option will be to
         * fully embed the font to avoid incorrect encoding in the resulting subset.
         * @param foundUnexpected true if unexpected postscript is found.
         */
        protected void setFoundUnexpected(boolean foundUnexpected) {
            this.foundUnexpected = foundUnexpected;
        }

        /**
         * Returns whether unexpected postscript has been found in the element
         * @return true if unexpected postscript is found
         */
        public boolean getFoundUnexpected() {
            return this.foundUnexpected;
        }
    }

    /**
     * An object representing a Postscript array with a fixed number of entries
     */
    public class PSFixedArray extends PSElement {

        private String entry = "";
        private String token = "";
        private boolean finished = false;
        protected int binaryLength = 0;
        /* A list containing each entry and it's contents in the array */
        private HashMap<Integer, String> entries;
        private static final String READ_ONLY = "readonly";

        public PSFixedArray(String operator, int startPoint) {
            super(operator, startPoint);
            entries = new HashMap<Integer, String>();
        }

        @Override
        public void parseToken(String token, int curPos) {
            if (!checkForEnd(token) || token.equals("def")) {
                hasMore = false;
                endPoint = curPos;
                return;
            }
            if (token.equals("dup")) {
                if (entry.startsWith("dup")) {
                    addEntry(entry);
                }
                entry = "";
                tokens.clear();
            }
            if (!token.equals(READ_ONLY)) {
                entry += token + " ";
            }
            if (!token.trim().equals("")) {
                tokens.add(token);
            }
            if (tokens.size() == 4 && tokens.get(0).equals("dup") && isInteger(tokens.get(2))) {
                binaryLength = Integer.parseInt(tokens.get(2));
                readBinary = true;
            }
        }

        private boolean checkForEnd(String checkToken) {
            boolean subFound = false;
            //Check for a subroutine matching that of an array end definition
            PSSubroutine sub = subroutines.get("/" + checkToken);
            if (sub != null && sub.getSubroutine().contains("def")) {
                subFound = true;
            }
            if (!finished && (subFound || checkToken.equals("def"))) {
                finished = true;
                addEntry(entry);
                return false;
            } else {
                return !finished;
            }
        }

        /**
         * Gets a map of array entries identified by index
         * @return Returns the map of array entries
         */
        public HashMap<Integer, String> getEntries() {
            return entries;
        }

        private void addEntry(String entry) {
            if (!entry.equals("")) {
                if (entry.indexOf('/') != -1 && entry.charAt(entry.indexOf('/') - 1) != ' ') {
                    entry = entry.replace("/", " /");
                }
                int entryLen;
                do {
                    entryLen = entry.length();
                    entry = entry.replace("  ", " ");
                } while (entry.length() != entryLen);
                Scanner s = new Scanner(entry).useDelimiter(" ");
                boolean valid = false;
                do {
                    s.next();
                    if (!s.hasNext()) {
                        break;
                    }
                    int id = s.nextInt();
                    entries.put(id, entry);
                    valid = true;
                } while (false);
                if (!valid) {
                    setFoundUnexpected(true);
                }
            }
        }

        @Override
        public void parseByte(byte cur, int pos) {
            if (binaryLength > 0) {
                token += (char)cur;
                binaryLength--;
            } else {
                if (readBinary) {
                    int bLength = Integer.parseInt(tokens.get(2));
                    int start = pos - bLength;
                    int end = start + bLength;
                    binaryEntries.put(tokens.get(1), new int[] {start, end});
                    token = "";
                    readBinary = false;
                } else {
                    tokens.add(token);
                    parseToken(token, pos);
                    token = "";
                }
            }
        }
    }

    /**
     * An object representing a Postscript array with a variable number of entries
     */
    public class PSVariableArray extends PSElement {
        private int level = 0;
        private List<String> arrayItems;
        private String entry = "";

        public PSVariableArray(String operator, int startPoint) {
            super(operator, startPoint);
            arrayItems = new ArrayList<String>();
        }

        @Override
        public void parseToken(String token, int curPos) {
            entry += token + " ";
            if (level <= 0 && token.length() > 0 && token.charAt(0) == ']') {
                hasMore = false;
                endPoint = curPos;
                return;
            }
            /* If the array item is a subroutine, the following keeps track of the current level
             * of the tokens being parsed so that it can identify the finish */
            if (token.equals("{")) {
                level++;
            } else if (token.equals("}")) {
                level--;
                if (!entry.equals("") && level == 0) {
                    arrayItems.add(entry);
                    entry = "";
                }
            }
        }

        /**
         * Gets a list of found array entries within the variable array
         * @return Returns the found array elements as a list
         */
        public List<String> getEntries() {
            return arrayItems;
        }

        @Override
        public void parseByte(byte cur, int pos) {
            //Not currently used
        }
    }

    /**
     * An object representing a Postscript subroutine element
     */
    public class PSSubroutine extends PSElement {
        private int level = 1;
        private String entry = "";

        public PSSubroutine(String operator, int startPoint) {
            super(operator, startPoint);
        }

        @Override
        public void parseToken(String token, int curPos) {
            if (level == 0 && token.length() > 0 && (token.equals("def") || token.equals("ifelse")
                    || token.charAt(0) == '}')) {
                hasMore = false;
                endPoint = curPos;
                return;
            }
            if (token.equals("{")) {
                level++;
            } else if (token.equals("}")) {
                level--;
            }
            entry += token + " ";
        }

        /**
         * Gets the parsed subroutine element as unmodified string
         * @return The subroutine as a string
         */
        public String getSubroutine() {
            return entry.trim();
        }

        @Override
        public void parseByte(byte cur, int pos) {
            //Not currently used
        }
    }

    /**
     * An object representing a Postscript dictionary
     */
    public class PSDictionary extends PSElement {
        /* A list of dictionary entries which they themselves could be variables,
         * subroutines and arrays, This is currently left as parsed Strings as there is
         * no need to delve deeper for our current purposes. */
        private HashMap<String, String> entries;
        private String entry = "";
        private String token = "";
        protected int binaryLength = 0;

        public PSDictionary(String operator, int startPoint) {
            super(operator, startPoint);
            entries = new HashMap<String, String>();
        }

        @Override
        public void parseToken(String token, int curPos) {
            if (token.equals("end")) {
                addEntry(entry);
                hasMore = false;
                endPoint = curPos;
                return;
            }
            if (token.startsWith("/")) {
                if (entry.trim().startsWith("/")) {
                    tokens.clear();
                    addEntry(entry);
                }
                entry = "";
            }
            if (tokens.size() >= 1 || token.startsWith("/")) {
                tokens.add(token);
            }
            entry += token + " ";
            if (tokens.size() == 3 && tokens.get(0).startsWith("/") && !tokens.get(2).equals("def")
                    && isInteger(tokens.get(1))) {
                binaryLength = Integer.parseInt(tokens.get(1));
                readBinary = true;
            }
        }

        /**
         * Gets a map of dictionary entries identified by their name
         * @return Returns the dictionary entries as a map
         */
        public HashMap<String, String> getEntries() {
            return entries;
        }

        private void addEntry(String entry) {
            Scanner s = new Scanner(entry).useDelimiter(" ");
            String id = s.next();
            entries.put(id, entry);
        }

        @Override
        public void parseByte(byte cur, int pos) {
            if (binaryLength > 0) {
                binaryLength--;
            } else {
                if (readBinary) {
                    int start = pos - Integer.parseInt(tokens.get(1));
                    int end = pos;
                    binaryEntries.put(tokens.get(0), new int[] {start, end});
                    readBinary = false;
                } else {
                    tokens.add(token);
                    parseToken(token, pos);
                }
            }
        }
    }

    /**
     * An object representing a Postscript variable
     */
    public class PSVariable extends PSElement {

        /* The value of the parsed Postscript variable. */
        private String value = "";

        public PSVariable(String operator, int startPoint) {
            super(operator, startPoint);
        }

        @Override
        public void parseToken(String token, int curPos) {
            if (token.equals("def")) {
                hasMore = false;
                endPoint = curPos;
                return;
            }
        }

        @Override
        public void parseByte(byte cur, int pos) {
            //Not currently used
        }

        /**
         * Sets the value of the Postscript variable value
         * @param value The value to set
         */
        public void setValue(String value) {
            this.value = value;
        }

        /**
         * Gets the value of the Postscript variable
         * @return Returns the value as a String
         */
        public String getValue() {
            return value;
        }

        /**
         * Sets the end point location of the current Postscript variable.
         * @param endPoint The end point location as an integer
         */
        public void setEndPoint(int endPoint) {
            this.endPoint = endPoint;
        }

    }
}