You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PostscriptParser.java 23KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.fonts.type1;
  19. import java.io.IOException;
  20. import java.io.UnsupportedEncodingException;
  21. import java.util.ArrayList;
  22. import java.util.HashMap;
  23. import java.util.LinkedHashMap;
  24. import java.util.List;
  25. import java.util.Map.Entry;
  26. import java.util.Scanner;
  27. import org.apache.commons.logging.Log;
  28. import org.apache.commons.logging.LogFactory;
  29. public class PostscriptParser {
  30. protected static final Log LOG = LogFactory.getLog(PostscriptParser.class);
  31. /* Patterns used to identify Postscript elements */
  32. private static final String DICTIONARY = "dict";
  33. private static final String FIXED_ARRAY = "array";
  34. private static final String VARIABLE_ARRAY = "[";
  35. private static final String SUBROUTINE = "{";
  36. /* A list of parsed subroutines so if they are encountered during the parsing
  37. * phase of another element, they can be read and pattern matched. */
  38. private HashMap<String, PSSubroutine> subroutines = new HashMap<String, PSSubroutine>();
  39. /**
  40. * Parses the postscript document and returns a list of elements
  41. * @param segment The byte array containing the postscript data
  42. * @return A list of found Postscript elements
  43. * @throws IOException
  44. */
  45. public List<PSElement> parse(byte[] segment) throws IOException {
  46. List<PSElement> parsedElements = new ArrayList<PSElement>();
  47. /* Currently only scan and store the top level element. For deeper
  48. * Postscript parsing you can push and pop elements from a stack */
  49. PSElement foundElement = null;
  50. String operator = null;
  51. StringBuilder token = new StringBuilder();
  52. List<String> tokens = new ArrayList<String>();
  53. int startPoint = -1;
  54. boolean specialDelimiter = false;
  55. boolean lastWasSpecial = false;
  56. for (int i = 0; i < segment.length; i++) {
  57. byte cur = segment[i];
  58. if (foundElement != null && foundElement.hasMore()) {
  59. foundElement.parse(cur, i);
  60. continue;
  61. } else {
  62. char c = (char)cur;
  63. if (!lastWasSpecial) {
  64. specialDelimiter = (c == '{' || c == '}' || c == '[' || c == ']'
  65. || (!token.toString().equals("") && c == '/'));
  66. boolean isNotBreak = !(c == ' ' || c == '\r' || cur == 15 || cur == 12
  67. || cur == 10);
  68. if (isNotBreak && !specialDelimiter) {
  69. token.append(c);
  70. continue;
  71. }
  72. } else {
  73. lastWasSpecial = false;
  74. token.append(c);
  75. if (token.toString().equals("/")) {
  76. continue;
  77. }
  78. }
  79. }
  80. try {
  81. boolean setOp = false;
  82. if ((foundElement == null || !foundElement.hasMore()) && token.length() > 1
  83. && token.charAt(0) == '/' && tokens.size() != 1 || hasEndToken(token.toString())) {
  84. operator = token.toString();
  85. setOp = true;
  86. if (tokens.size() > 2 && tokens.get(tokens.size() - 1).equals("def")) {
  87. PSVariable newVar = new PSVariable(tokens.get(0), startPoint);
  88. newVar.setValue(tokens.get(1));
  89. newVar.setEndPoint(i - operator.length());
  90. parsedElements.add(newVar);
  91. }
  92. tokens.clear();
  93. startPoint = i - token.length();
  94. }
  95. if (operator != null) {
  96. if (foundElement instanceof PSSubroutine) {
  97. PSSubroutine sub = (PSSubroutine)foundElement;
  98. subroutines.put(sub.getOperator(), sub);
  99. parsedElements.add(sub);
  100. if (!setOp) {
  101. operator = "";
  102. }
  103. } else {
  104. if (foundElement != null) {
  105. if (!hasMatch(foundElement.getOperator(), parsedElements)) {
  106. parsedElements.add(foundElement);
  107. } else {
  108. LOG.warn("Duplicate " + foundElement.getOperator()
  109. + " in font file, Ignoring.");
  110. }
  111. }
  112. }
  113. //Compare token against patterns and create an element if matched
  114. foundElement = createElement(operator, token.toString(), startPoint);
  115. }
  116. } finally {
  117. tokens.add(token.toString());
  118. token = new StringBuilder();
  119. if (specialDelimiter) {
  120. specialDelimiter = false;
  121. lastWasSpecial = true;
  122. //Retrace special postscript character so it can be processed separately
  123. i--;
  124. }
  125. }
  126. }
  127. return parsedElements;
  128. }
  129. private boolean hasEndToken(String token) {
  130. return token.equals("currentdict");
  131. }
  132. private boolean hasMatch(String operator, List<PSElement> elements) {
  133. for (PSElement element : elements) {
  134. if (element.getOperator().equals(operator)) {
  135. return true;
  136. }
  137. }
  138. return false;
  139. }
  140. public PSElement createElement(String operator, String elementID, int startPoint) {
  141. if (operator.equals("")) {
  142. return null;
  143. }
  144. if (elementID.equals(FIXED_ARRAY)) {
  145. return new PSFixedArray(operator, startPoint);
  146. } else if (elementID.equals(VARIABLE_ARRAY)) {
  147. return new PSVariableArray(operator, startPoint);
  148. } else if (elementID.equals(SUBROUTINE)) {
  149. return new PSSubroutine(operator, startPoint);
  150. } else if (!operator.equals("/Private") && elementID.equals(DICTIONARY)) {
  151. return new PSDictionary(operator, startPoint);
  152. }
  153. return null;
  154. }
  155. /**
  156. * A base Postscript element class
  157. */
  158. public abstract class PSElement {
  159. /* The identifying operator for this element */
  160. protected String operator;
  161. private List<Byte> token;
  162. /* Determines whether there is any more data to be read whilst parsing */
  163. protected boolean hasMore = true;
  164. /* The locations of any entries containing binary data (e.g. arrays) */
  165. protected LinkedHashMap<String, int[]> binaryEntries;
  166. /* The tokens parsed from the current element */
  167. protected List<String> tokens;
  168. /* Determines whether binary data is currently being read / parsed */
  169. protected boolean readBinary = false;
  170. /* The location of the element within the binary data */
  171. private int startPoint = -1;
  172. protected int endPoint = -1;
  173. /* A flag to determine if unexpected postscript has been found in the element */
  174. private boolean foundUnexpected = false;
  175. public PSElement(String operator, int startPoint) {
  176. this.operator = operator;
  177. this.startPoint = startPoint;
  178. token = new ArrayList<Byte>();
  179. binaryEntries = new LinkedHashMap<String, int[]>();
  180. tokens = new ArrayList<String>();
  181. }
  182. /**
  183. * Gets the Postscript element operator
  184. * @return The operator returned as a string
  185. */
  186. public String getOperator() {
  187. return operator;
  188. }
  189. /**
  190. * The start location of the element within the source binary data
  191. * @return The start location returned as an integer
  192. */
  193. public int getStartPoint() {
  194. return startPoint;
  195. }
  196. /**
  197. * The end location of the element within the source binary data
  198. * @return The end location returned as an integer
  199. */
  200. public int getEndPoint() {
  201. return endPoint;
  202. }
  203. /**
  204. * Takes over the task of tokenizing the byte data
  205. * @param cur The current byte being read
  206. */
  207. public void parse(byte cur, int pos) throws UnsupportedEncodingException {
  208. if (!readBinary) {
  209. char c = (char)cur;
  210. boolean specialDelimiter = (c == '{' || c == '}' || c == '[' || c == ']'
  211. || c == '(' || c == ')');
  212. boolean isNotValidBreak = !(c == ' ' || cur == 15 || cur == 12 || c == '\r'
  213. || c == 10);
  214. if (isNotValidBreak && !specialDelimiter) {
  215. token.add(cur);
  216. } else {
  217. parseToken(pos);
  218. }
  219. if (specialDelimiter) {
  220. token.add(cur);
  221. parseToken(pos);
  222. }
  223. } else {
  224. parseByte(cur, pos);
  225. }
  226. }
  227. private void parseToken(int pos) throws UnsupportedEncodingException {
  228. byte[] bytesToken = new byte[token.size()];
  229. for (int i = 0; i < token.size(); i++) {
  230. bytesToken[i] = token.get(i).byteValue();
  231. }
  232. parseToken(new String(bytesToken, "ASCII"), pos);
  233. token.clear();
  234. }
  235. /**
  236. * Passes responsibility for processing the byte stream to the PostScript object
  237. * @param cur The byte currently being read
  238. * @param pos The position of the given byte
  239. */
  240. public abstract void parseByte(byte cur, int pos);
  241. /**
  242. * Delegates the parse routine to a sub class
  243. * @param token The token which to parse
  244. */
  245. public abstract void parseToken(String token, int curPos);
  246. protected boolean isInteger(String intValue) {
  247. try {
  248. Integer.parseInt(intValue);
  249. return true;
  250. } catch (NumberFormatException ex) {
  251. return false;
  252. }
  253. }
  254. public LinkedHashMap<String, int[]> getBinaryEntries() {
  255. return binaryEntries;
  256. }
  257. /**
  258. * Gets the binary entry location of a given index from the array
  259. * @param index The index for which to retrieve the binary data location
  260. * @return
  261. */
  262. public int[] getBinaryEntryByIndex(int index) {
  263. int count = 0;
  264. for (Entry<String, int[]> entry : binaryEntries.entrySet()) {
  265. if (count == index) {
  266. return entry.getValue();
  267. }
  268. count++;
  269. }
  270. return new int[0];
  271. }
  272. /**
  273. * Determines if more data is still to be parsed for the Postscript element.
  274. * @return Returns true if more data exists
  275. */
  276. public boolean hasMore() {
  277. return hasMore;
  278. }
  279. /**
  280. * Sets a value to be true if an expected entry postscript is found in the element.
  281. * An example is where the encoding table may have a series of postscript operators
  282. * altering the state of the array. In this case the only option will be to
  283. * fully embed the font to avoid incorrect encoding in the resulting subset.
  284. * @param foundUnexpected true if unexpected postscript is found.
  285. */
  286. protected void setFoundUnexpected(boolean foundUnexpected) {
  287. this.foundUnexpected = foundUnexpected;
  288. }
  289. /**
  290. * Returns whether unexpected postscript has been found in the element
  291. * @return true if unexpected postscript is found
  292. */
  293. public boolean getFoundUnexpected() {
  294. return this.foundUnexpected;
  295. }
  296. }
  297. /**
  298. * An object representing a Postscript array with a fixed number of entries
  299. */
  300. public class PSFixedArray extends PSElement {
  301. private String entry = "";
  302. private String token = "";
  303. private boolean finished = false;
  304. protected int binaryLength = 0;
  305. /* A list containing each entry and it's contents in the array */
  306. private HashMap<Integer, String> entries;
  307. private static final String READ_ONLY = "readonly";
  308. public PSFixedArray(String operator, int startPoint) {
  309. super(operator, startPoint);
  310. entries = new HashMap<Integer, String>();
  311. }
  312. @Override
  313. public void parseToken(String token, int curPos) {
  314. if (!checkForEnd(token) || token.equals("def")) {
  315. hasMore = false;
  316. endPoint = curPos;
  317. return;
  318. }
  319. if (token.equals("dup")) {
  320. if (entry.startsWith("dup")) {
  321. addEntry(entry);
  322. }
  323. entry = "";
  324. tokens.clear();
  325. }
  326. if (!token.equals(READ_ONLY)) {
  327. entry += token + " ";
  328. }
  329. if (!token.trim().equals("")) {
  330. tokens.add(token);
  331. }
  332. if (tokens.size() == 4 && tokens.get(0).equals("dup") && isInteger(tokens.get(2))) {
  333. binaryLength = Integer.parseInt(tokens.get(2));
  334. readBinary = true;
  335. }
  336. }
  337. private boolean checkForEnd(String checkToken) {
  338. boolean subFound = false;
  339. //Check for a subroutine matching that of an array end definition
  340. PSSubroutine sub = subroutines.get("/" + checkToken);
  341. if (sub != null && sub.getSubroutine().contains("def")) {
  342. subFound = true;
  343. }
  344. if (!finished && (subFound || checkToken.equals("def"))) {
  345. finished = true;
  346. addEntry(entry);
  347. return false;
  348. } else {
  349. return !finished;
  350. }
  351. }
  352. /**
  353. * Gets a map of array entries identified by index
  354. * @return Returns the map of array entries
  355. */
  356. public HashMap<Integer, String> getEntries() {
  357. return entries;
  358. }
  359. private void addEntry(String entry) {
  360. if (!entry.equals("")) {
  361. if (entry.indexOf('/') != -1 && entry.charAt(entry.indexOf('/') - 1) != ' ') {
  362. entry = entry.replace("/", " /");
  363. }
  364. int entryLen;
  365. do {
  366. entryLen = entry.length();
  367. entry = entry.replace(" ", " ");
  368. } while (entry.length() != entryLen);
  369. Scanner s = new Scanner(entry).useDelimiter(" ");
  370. boolean valid = false;
  371. do {
  372. s.next();
  373. if (!s.hasNext()) {
  374. break;
  375. }
  376. int id = s.nextInt();
  377. entries.put(id, entry);
  378. valid = true;
  379. } while (false);
  380. if (!valid) {
  381. setFoundUnexpected(true);
  382. }
  383. }
  384. }
  385. @Override
  386. public void parseByte(byte cur, int pos) {
  387. if (binaryLength > 0) {
  388. token += (char)cur;
  389. binaryLength--;
  390. } else {
  391. if (readBinary) {
  392. int bLength = Integer.parseInt(tokens.get(2));
  393. int start = pos - bLength;
  394. int end = start + bLength;
  395. binaryEntries.put(tokens.get(1), new int[] {start, end});
  396. token = "";
  397. readBinary = false;
  398. } else {
  399. tokens.add(token);
  400. parseToken(token, pos);
  401. token = "";
  402. }
  403. }
  404. }
  405. }
  406. /**
  407. * An object representing a Postscript array with a variable number of entries
  408. */
  409. public class PSVariableArray extends PSElement {
  410. private int level = 0;
  411. private List<String> arrayItems;
  412. private String entry = "";
  413. public PSVariableArray(String operator, int startPoint) {
  414. super(operator, startPoint);
  415. arrayItems = new ArrayList<String>();
  416. }
  417. @Override
  418. public void parseToken(String token, int curPos) {
  419. entry += token + " ";
  420. if (level <= 0 && token.length() > 0 && token.charAt(0) == ']') {
  421. hasMore = false;
  422. endPoint = curPos;
  423. return;
  424. }
  425. /* If the array item is a subroutine, the following keeps track of the current level
  426. * of the tokens being parsed so that it can identify the finish */
  427. if (token.equals("{")) {
  428. level++;
  429. } else if (token.equals("}")) {
  430. level--;
  431. if (!entry.equals("") && level == 0) {
  432. arrayItems.add(entry);
  433. entry = "";
  434. }
  435. }
  436. }
  437. /**
  438. * Gets a list of found array entries within the variable array
  439. * @return Returns the found array elements as a list
  440. */
  441. public List<String> getEntries() {
  442. return arrayItems;
  443. }
  444. @Override
  445. public void parseByte(byte cur, int pos) {
  446. //Not currently used
  447. }
  448. }
  449. /**
  450. * An object representing a Postscript subroutine element
  451. */
  452. public class PSSubroutine extends PSElement {
  453. private int level = 1;
  454. private String entry = "";
  455. public PSSubroutine(String operator, int startPoint) {
  456. super(operator, startPoint);
  457. }
  458. @Override
  459. public void parseToken(String token, int curPos) {
  460. if (level == 0 && token.length() > 0 && (token.equals("def") || token.equals("ifelse")
  461. || token.charAt(0) == '}')) {
  462. hasMore = false;
  463. endPoint = curPos;
  464. return;
  465. }
  466. if (token.equals("{")) {
  467. level++;
  468. } else if (token.equals("}")) {
  469. level--;
  470. }
  471. entry += token + " ";
  472. }
  473. /**
  474. * Gets the parsed subroutine element as unmodified string
  475. * @return The subroutine as a string
  476. */
  477. public String getSubroutine() {
  478. return entry.trim();
  479. }
  480. @Override
  481. public void parseByte(byte cur, int pos) {
  482. //Not currently used
  483. }
  484. }
  485. /**
  486. * An object representing a Postscript dictionary
  487. */
  488. public class PSDictionary extends PSElement {
  489. /* A list of dictionary entries which they themselves could be variables,
  490. * subroutines and arrays, This is currently left as parsed Strings as there is
  491. * no need to delve deeper for our current purposes. */
  492. private HashMap<String, String> entries;
  493. private String entry = "";
  494. private String token = "";
  495. protected int binaryLength = 0;
  496. public PSDictionary(String operator, int startPoint) {
  497. super(operator, startPoint);
  498. entries = new HashMap<String, String>();
  499. }
  500. @Override
  501. public void parseToken(String token, int curPos) {
  502. if (token.equals("end")) {
  503. addEntry(entry);
  504. hasMore = false;
  505. endPoint = curPos;
  506. return;
  507. }
  508. if (token.startsWith("/")) {
  509. if (entry.trim().startsWith("/")) {
  510. tokens.clear();
  511. addEntry(entry);
  512. }
  513. entry = "";
  514. }
  515. if (tokens.size() >= 1 || token.startsWith("/")) {
  516. tokens.add(token);
  517. }
  518. entry += token + " ";
  519. if (tokens.size() == 3 && tokens.get(0).startsWith("/") && !tokens.get(2).equals("def")
  520. && isInteger(tokens.get(1))) {
  521. binaryLength = Integer.parseInt(tokens.get(1));
  522. readBinary = true;
  523. }
  524. }
  525. /**
  526. * Gets a map of dictionary entries identified by their name
  527. * @return Returns the dictionary entries as a map
  528. */
  529. public HashMap<String, String> getEntries() {
  530. return entries;
  531. }
  532. private void addEntry(String entry) {
  533. Scanner s = new Scanner(entry).useDelimiter(" ");
  534. String id = s.next();
  535. entries.put(id, entry);
  536. }
  537. @Override
  538. public void parseByte(byte cur, int pos) {
  539. if (binaryLength > 0) {
  540. binaryLength--;
  541. } else {
  542. if (readBinary) {
  543. int start = pos - Integer.parseInt(tokens.get(1));
  544. int end = pos;
  545. binaryEntries.put(tokens.get(0), new int[] {start, end});
  546. readBinary = false;
  547. } else {
  548. tokens.add(token);
  549. parseToken(token, pos);
  550. }
  551. }
  552. }
  553. }
  554. /**
  555. * An object representing a Postscript variable
  556. */
  557. public class PSVariable extends PSElement {
  558. /* The value of the parsed Postscript variable. */
  559. private String value = "";
  560. public PSVariable(String operator, int startPoint) {
  561. super(operator, startPoint);
  562. }
  563. @Override
  564. public void parseToken(String token, int curPos) {
  565. if (token.equals("def")) {
  566. hasMore = false;
  567. endPoint = curPos;
  568. return;
  569. }
  570. }
  571. @Override
  572. public void parseByte(byte cur, int pos) {
  573. //Not currently used
  574. }
  575. /**
  576. * Sets the value of the Postscript variable value
  577. * @param value The value to set
  578. */
  579. public void setValue(String value) {
  580. this.value = value;
  581. }
  582. /**
  583. * Gets the value of the Postscript variable
  584. * @return Returns the value as a String
  585. */
  586. public String getValue() {
  587. return value;
  588. }
  589. /**
  590. * Sets the end point location of the current Postscript variable.
  591. * @param endPoint The end point location as an integer
  592. */
  593. public void setEndPoint(int endPoint) {
  594. this.endPoint = endPoint;
  595. }
  596. }
  597. }