You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

StripNonBodyHtml.java 7.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. /* *******************************************************************
  2. * Copyright (c) 1999-2001 Xerox Corporation,
  3. * 2002 Palo Alto Research Center, Incorporated (PARC).
  4. * All rights reserved.
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Public License v 2.0
  7. * which accompanies this distribution and is available at
  8. * https://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.txt
  9. *
  10. * Contributors:
  11. * Xerox/PARC initial implementation
  12. * ******************************************************************/
  13. package org.aspectj.internal.tools.ant.taskdefs;
  14. import java.io.BufferedInputStream;
  15. import java.io.BufferedOutputStream;
  16. import java.io.ByteArrayOutputStream;
  17. import java.io.File;
  18. import java.io.FileInputStream;
  19. import java.io.FileOutputStream;
  20. import java.io.IOException;
  21. import java.io.InputStream;
  22. import org.apache.tools.ant.BuildException;
  23. import org.apache.tools.ant.DirectoryScanner;
  24. import org.apache.tools.ant.taskdefs.MatchingTask;
  25. /**
  26. * Task to convert html source files into files with only body content.
  27. *
  28. * <p> This task can take the following arguments:</p>
  29. *
  30. * <ul>
  31. * <li>srcdir</li>
  32. * <li>destdir</li>
  33. * <li>include</li>
  34. * <li>exclude</li>
  35. * </ul>
  36. *
  37. * <p>Of these arguments, only <b>sourcedir</b> is required.</p>
  38. *
  39. * <p> When this task executes, it will scan the srcdir based on the
  40. * include and exclude properties.</p>
  41. */
  42. public class StripNonBodyHtml extends MatchingTask {
  43. private File srcDir;
  44. private File destDir = null;
  45. public void setSrcdir(File srcDir) {
  46. this.srcDir = srcDir;
  47. }
  48. public void setDestdir(File destDir) {
  49. this.destDir = destDir;
  50. }
  51. public void execute() throws BuildException {
  52. if (srcDir == null) {
  53. throw new BuildException("srcdir attribute must be set!");
  54. }
  55. if (!srcDir.exists()) {
  56. throw new BuildException("srcdir does not exist!");
  57. }
  58. if (!srcDir.isDirectory()) {
  59. throw new BuildException("srcdir is not a directory!");
  60. }
  61. if (destDir != null) {
  62. if (!destDir.exists()) {
  63. throw new BuildException("destdir does not exist!");
  64. }
  65. if (!destDir.isDirectory()) {
  66. throw new BuildException("destdir is not a directory!");
  67. }
  68. }
  69. DirectoryScanner ds = super.getDirectoryScanner(srcDir);
  70. String[] files = ds.getIncludedFiles();
  71. log("stripping " + files.length + " files");
  72. int stripped = 0;
  73. for (String file : files) {
  74. if (processFile(file)) {
  75. stripped++;
  76. } else {
  77. log(file + " not stripped");
  78. }
  79. }
  80. log(stripped + " files successfully stripped");
  81. }
  82. boolean processFile(String filename) throws BuildException {
  83. File srcFile = new File(srcDir, filename);
  84. File destFile;
  85. if (destDir == null) {
  86. destFile = srcFile;
  87. } else {
  88. destFile = new File(destDir, filename);
  89. destFile.getParentFile().mkdirs();
  90. }
  91. try {
  92. return strip(srcFile, destFile);
  93. } catch (IOException e) {
  94. throw new BuildException(e);
  95. }
  96. }
  97. private boolean strip(File f, File g) throws IOException {
  98. BufferedInputStream in =
  99. new BufferedInputStream(new FileInputStream(f));
  100. String s = readToString(in);
  101. in.close();
  102. return writeBodyTo(s, g);
  103. }
  104. private ByteArrayOutputStream temp = new ByteArrayOutputStream();
  105. private byte[] buf = new byte[2048];
  106. private String readToString(InputStream in) throws IOException {
  107. ByteArrayOutputStream temp = this.temp;
  108. byte[] buf = this.buf;
  109. String s = "";
  110. try {
  111. while (true) {
  112. int i = in.read(buf, 0, 2048);
  113. if (i == -1) break;
  114. temp.write(buf, 0, i);
  115. }
  116. s = temp.toString();
  117. } finally {
  118. temp.reset();
  119. }
  120. return s;
  121. }
  122. private boolean writeBodyTo(String s, File f) throws IOException {
  123. int start;//, end;
  124. try {
  125. start = findStart(s);
  126. findEnd(s, start);
  127. } catch (ParseException e) {
  128. return false; // if we get confused, just don't write the file.
  129. }
  130. s = processBody(s,f);
  131. BufferedOutputStream out =
  132. new BufferedOutputStream(new FileOutputStream(f));
  133. out.write(s.getBytes());
  134. out.close();
  135. return true;
  136. }
  137. /**
  138. * Process body. This implemenation strips text
  139. * between &lt!-- start strip --&gt
  140. * and &lt!-- end strip --&gt
  141. * inclusive.
  142. */
  143. private String processBody(String body, File file) {
  144. if (null == body) return body;
  145. final String START = "<!-- start strip -->";
  146. final String END = "<!-- end strip -->";
  147. return stripTags(body, file.toString(), START, END);
  148. }
  149. /**
  150. * Strip 0..n substrings in input: "s/${START}.*${END}//g"
  151. * @param input the String to strip
  152. * @param source the name of the source for logging purposes
  153. * @param start the starting tag (case sensitive)
  154. * @param end the ending tag (case sensitive)
  155. */
  156. String stripTags(String input, final String SOURCE,
  157. final String START, final String END) {
  158. if (null == input) return input;
  159. StringBuilder buffer = new StringBuilder(input.length());
  160. String result = input;
  161. int curLoc = 0;
  162. while (true) {
  163. int startLoc = input.indexOf(START, curLoc);
  164. if (-1 == startLoc) {
  165. buffer.append(input.substring(curLoc));
  166. result = buffer.toString();
  167. break; // <------------ valid exit
  168. } else {
  169. int endLoc = input.indexOf(END, startLoc);
  170. if (-1 == endLoc) {
  171. log(SOURCE + " stripTags - no end tag - startLoc=" + startLoc);
  172. break; // <------------ invalid exit
  173. } else if (endLoc < startLoc) {
  174. log(SOURCE + " stripTags - impossible: startLoc="
  175. + startLoc + " > endLoc=" + endLoc);
  176. break; // <------------ invalid exit
  177. } else {
  178. buffer.append(input.substring(curLoc, startLoc));
  179. curLoc = endLoc + END.length();
  180. }
  181. }
  182. }
  183. return result;
  184. }
  185. private int findStart(String s) throws ParseException {
  186. int len = s.length();
  187. int start = 0;
  188. while (true) {
  189. start = s.indexOf("<body", start);
  190. if (start == -1) {
  191. start = s.indexOf("<BODY", start);
  192. if (start == -1) throw barf();
  193. }
  194. start = start + 5;
  195. if (start >= len) throw barf();
  196. char ch = s.charAt(start);
  197. if (ch == '>') return start + 1;
  198. if (Character.isWhitespace(ch)) {
  199. start = s.indexOf('>', start);
  200. if (start == -1) return -1;
  201. return start + 1;
  202. }
  203. }
  204. }
  205. private int findEnd(String s, int start) throws ParseException {
  206. int end;
  207. end = s.indexOf("</body>", start);
  208. if (end == -1) {
  209. end = s.indexOf("</BODY>", start);
  210. if (end == -1) throw barf();
  211. }
  212. return end;
  213. }
  214. private static class ParseException extends Exception {
  215. private static final long serialVersionUID = -1l;
  216. }
  217. private static ParseException barf() {
  218. return new ParseException();
  219. }
  220. }