You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

StripNonBodyHtml.java 7.6KB

21 years ago
21 years ago
21 years ago
21 years ago
21 years ago
21 years ago
21 years ago
21 years ago
21 years ago
21 years ago
19 years ago
19 years ago
21 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. /* *******************************************************************
  2. * Copyright (c) 1999-2001 Xerox Corporation,
  3. * 2002 Palo Alto Research Center, Incorporated (PARC).
  4. * All rights reserved.
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Public License v 2.0
  7. * which accompanies this distribution and is available at
  8. * https://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.txt
  9. *
  10. * Contributors:
  11. * Xerox/PARC initial implementation
  12. * ******************************************************************/
  13. package org.aspectj.internal.tools.ant.taskdefs;
  14. import java.io.BufferedInputStream;
  15. import java.io.BufferedOutputStream;
  16. import java.io.ByteArrayOutputStream;
  17. import java.io.File;
  18. import java.io.FileInputStream;
  19. import java.io.FileOutputStream;
  20. import java.io.IOException;
  21. import java.io.InputStream;
  22. import org.apache.tools.ant.BuildException;
  23. import org.apache.tools.ant.DirectoryScanner;
  24. import org.apache.tools.ant.taskdefs.MatchingTask;
  25. /**
  26. * Task to convert html source files into files with only body content.
  27. *
  28. * <p> This task can take the following arguments:</p>
  29. *
  30. * <ul>
  31. * <li>srcdir</li>
  32. * <li>destdir</li>
  33. * <li>include</li>
  34. * <li>exclude</li>
  35. * </ul>
  36. *
  37. * <p>Of these arguments, only <b>sourcedir</b> is required.</p>
  38. *
  39. * <p> When this task executes, it will scan the srcdir based on the
  40. * include and exclude properties.</p>
  41. */
  42. public class StripNonBodyHtml extends MatchingTask {
  43. private File srcDir;
  44. private File destDir = null;
  45. public void setSrcdir(File srcDir) {
  46. this.srcDir = srcDir;
  47. }
  48. public void setDestdir(File destDir) {
  49. this.destDir = destDir;
  50. }
  51. public void execute() throws BuildException {
  52. if (srcDir == null) {
  53. throw new BuildException("srcdir attribute must be set!");
  54. }
  55. if (!srcDir.exists()) {
  56. throw new BuildException("srcdir does not exist!");
  57. }
  58. if (!srcDir.isDirectory()) {
  59. throw new BuildException("srcdir is not a directory!");
  60. }
  61. if (destDir != null) {
  62. if (!destDir.exists()) {
  63. throw new BuildException("destdir does not exist!");
  64. }
  65. if (!destDir.isDirectory()) {
  66. throw new BuildException("destdir is not a directory!");
  67. }
  68. }
  69. DirectoryScanner ds = super.getDirectoryScanner(srcDir);
  70. String[] files = ds.getIncludedFiles();
  71. log("stripping " + files.length + " files");
  72. int stripped = 0;
  73. for (String file : files) {
  74. if (processFile(file)) {
  75. stripped++;
  76. } else {
  77. log(file + " not stripped");
  78. }
  79. }
  80. log(stripped + " files successfully stripped");
  81. }
  82. boolean processFile(String filename) throws BuildException {
  83. File srcFile = new File(srcDir, filename);
  84. File destFile;
  85. if (destDir == null) {
  86. destFile = srcFile;
  87. } else {
  88. destFile = new File(destDir, filename);
  89. destFile.getParentFile().mkdirs();
  90. }
  91. try {
  92. return strip(srcFile, destFile);
  93. } catch (IOException e) {
  94. throw new BuildException(e);
  95. }
  96. }
  97. private boolean strip(File f, File g) throws IOException {
  98. BufferedInputStream in =
  99. new BufferedInputStream(new FileInputStream(f));
  100. String s = readToString(in);
  101. in.close();
  102. return writeBodyTo(s, g);
  103. }
  104. private ByteArrayOutputStream temp = new ByteArrayOutputStream();
  105. private byte[] buf = new byte[2048];
  106. private String readToString(InputStream in) throws IOException {
  107. ByteArrayOutputStream temp = this.temp;
  108. byte[] buf = this.buf;
  109. String s = "";
  110. try {
  111. while (true) {
  112. int i = in.read(buf, 0, 2048);
  113. if (i == -1) break;
  114. temp.write(buf, 0, i);
  115. }
  116. s = temp.toString();
  117. } finally {
  118. temp.reset();
  119. }
  120. return s;
  121. }
  122. private boolean writeBodyTo(String s, File f) throws IOException {
  123. int start;//, end;
  124. try {
  125. start = findStart(s);
  126. findEnd(s, start);
  127. } catch (ParseException e) {
  128. return false; // if we get confused, just don't write the file.
  129. }
  130. s = processBody(s,f);
  131. BufferedOutputStream out =
  132. new BufferedOutputStream(new FileOutputStream(f));
  133. out.write(s.getBytes());
  134. out.close();
  135. return true;
  136. }
  137. /**
  138. * Process body. This implemenation strips text
  139. * between &lt!-- start strip --&gt
  140. * and &lt!-- end strip --&gt
  141. * inclusive.
  142. */
  143. private String processBody(String body, File file) {
  144. if (null == body) return body;
  145. final String START = "<!-- start strip -->";
  146. final String END = "<!-- end strip -->";
  147. return stripTags(body, file.toString(), START, END);
  148. }
  149. /**
  150. * Strip 0..n substrings in input: "s/${START}.*${END}//g"
  151. * @param input the String to strip
  152. * @param source the name of the source for logging purposes
  153. * @param start the starting tag (case sensitive)
  154. * @param end the ending tag (case sensitive)
  155. */
  156. String stripTags(String input, final String SOURCE,
  157. final String START, final String END) {
  158. if (null == input) return input;
  159. StringBuilder buffer = new StringBuilder(input.length());
  160. String result = input;
  161. int curLoc = 0;
  162. while (true) {
  163. int startLoc = input.indexOf(START, curLoc);
  164. if (-1 == startLoc) {
  165. buffer.append(input.substring(curLoc));
  166. result = buffer.toString();
  167. break; // <------------ valid exit
  168. } else {
  169. int endLoc = input.indexOf(END, startLoc);
  170. if (-1 == endLoc) {
  171. log(SOURCE + " stripTags - no end tag - startLoc=" + startLoc);
  172. break; // <------------ invalid exit
  173. } else if (endLoc < startLoc) {
  174. log(SOURCE + " stripTags - impossible: startLoc="
  175. + startLoc + " > endLoc=" + endLoc);
  176. break; // <------------ invalid exit
  177. } else {
  178. buffer.append(input.substring(curLoc, startLoc));
  179. curLoc = endLoc + END.length();
  180. }
  181. }
  182. }
  183. return result;
  184. }
  185. private int findStart(String s) throws ParseException {
  186. int len = s.length();
  187. int start = 0;
  188. while (true) {
  189. start = s.indexOf("<body", start);
  190. if (start == -1) {
  191. start = s.indexOf("<BODY", start);
  192. if (start == -1) throw barf();
  193. }
  194. start = start + 5;
  195. if (start >= len) throw barf();
  196. char ch = s.charAt(start);
  197. if (ch == '>') return start + 1;
  198. if (Character.isWhitespace(ch)) {
  199. start = s.indexOf('>', start);
  200. if (start == -1) return -1;
  201. return start + 1;
  202. }
  203. }
  204. }
  205. private int findEnd(String s, int start) throws ParseException {
  206. int end;
  207. end = s.indexOf("</body>", start);
  208. if (end == -1) {
  209. end = s.indexOf("</BODY>", start);
  210. if (end == -1) throw barf();
  211. }
  212. return end;
  213. }
  214. private static class ParseException extends Exception {
  215. private static final long serialVersionUID = -1l;
  216. }
  217. private static ParseException barf() {
  218. return new ParseException();
  219. }
  220. }