aboutsummaryrefslogtreecommitdiffstats
path: root/build/src/main/java/org/aspectj/internal/tools/ant/taskdefs/StripNonBodyHtml.java
blob: b5b41a93bb47bb8709a7d0259c0b5332d2f4e263 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
/* *******************************************************************
 * Copyright (c) 1999-2001 Xerox Corporation,
 *               2002 Palo Alto Research Center, Incorporated (PARC).
 * All rights reserved.
 * This program and the accompanying materials are made available
 * under the terms of the Eclipse Public License v 2.0
 * which accompanies this distribution and is available at
 * https://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.txt
 *
 * Contributors:
 *     Xerox/PARC     initial implementation
 * ******************************************************************/

package org.aspectj.internal.tools.ant.taskdefs;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;

import org.apache.tools.ant.BuildException;
import org.apache.tools.ant.DirectoryScanner;
import org.apache.tools.ant.taskdefs.MatchingTask;

/**
 * Task to convert html source files into files with only body content.
 *
 * <p> This task can take the following arguments:</p>
 *
 * <ul>
 *   <li>srcdir</li>
 *   <li>destdir</li>
 *   <li>include</li>
 *   <li>exclude</li>
 * </ul>
 *
 * <p>Of these arguments, only <b>sourcedir</b> is required.</p>
 *
 * <p> When this task executes, it will scan the srcdir based on the
 * include and exclude properties.</p>
 */

public class StripNonBodyHtml extends MatchingTask {

    private File srcDir;
    private File destDir = null;

    public void setSrcdir(File srcDir) {
        this.srcDir = srcDir;
    }

    public void setDestdir(File destDir) {
        this.destDir = destDir;
    }

    public void execute() throws BuildException {
        if (srcDir == null) {
            throw new BuildException("srcdir attribute must be set!");
        }
        if (!srcDir.exists()) {
            throw new BuildException("srcdir does not exist!");
        }
        if (!srcDir.isDirectory()) {
            throw new BuildException("srcdir is not a directory!");
        }
        if (destDir != null) {
            if (!destDir.exists()) {
                throw new BuildException("destdir does not exist!");
            }
            if (!destDir.isDirectory()) {
                throw new BuildException("destdir is not a directory!");
            }
        }

        DirectoryScanner ds = super.getDirectoryScanner(srcDir);
        String[] files = ds.getIncludedFiles();

        log("stripping " + files.length + " files");
        int stripped = 0;
		for (String file : files) {
			if (processFile(file)) {
				stripped++;
			} else {
				log(file + " not stripped");
			}
		}
        log(stripped + " files successfully stripped");
    }

    boolean processFile(String filename) throws BuildException {
        File srcFile = new File(srcDir, filename);
        File destFile;
        if (destDir == null) {
            destFile = srcFile;
        } else {
            destFile = new File(destDir, filename);
            destFile.getParentFile().mkdirs();
        }
        try {
            return strip(srcFile, destFile);
        } catch (IOException e) {
            throw new BuildException(e);
        }
    }

    private boolean strip(File f, File g) throws IOException {
        BufferedInputStream in =
            new BufferedInputStream(new FileInputStream(f));
        String s = readToString(in);
        in.close();
        return writeBodyTo(s, g);
    }

    private ByteArrayOutputStream temp = new ByteArrayOutputStream();
    private byte[] buf = new byte[2048];

    private String readToString(InputStream in) throws IOException {
        ByteArrayOutputStream temp = this.temp;
        byte[] buf = this.buf;
        String s = "";
        try {
            while (true) {
                int i = in.read(buf, 0, 2048);
                if (i == -1) break;
                temp.write(buf, 0, i);

            }
            s = temp.toString();
        } finally {
            temp.reset();
        }
        return s;
    }

    private boolean writeBodyTo(String s, File f) throws IOException {
        int start;//, end;
        try {
            start = findStart(s);
            findEnd(s, start);
        } catch (ParseException e) {
            return false; // if we get confused, just don't write the file.
        }
        s = processBody(s,f);
        BufferedOutputStream out =
            new BufferedOutputStream(new FileOutputStream(f));

        out.write(s.getBytes());
        out.close();
        return true;
    }

    /**
     * Process body. This implemenation strips text
     *   between  &lt!-- start strip --&gt
     *   and      &lt!-- end strip --&gt
     *   inclusive.
     */
    private String processBody(String body, File file) {
        if (null == body) return  body;
        final String START = "<!-- start strip -->";
        final String END = "<!-- end strip -->";
        return stripTags(body, file.toString(), START, END);
    }

   /**
     * Strip 0..n substrings in input: "s/${START}.*${END}//g"
     * @param input the String to strip
     * @param source the name of the source for logging purposes
     * @param start the starting tag (case sensitive)
     * @param end the ending tag (case sensitive)
     */
    String stripTags(String input, final String SOURCE,
                     final String START, final String END) {
        if (null == input) return  input;
        StringBuilder buffer = new StringBuilder(input.length());
        String result = input;
        int curLoc = 0;
        while (true) {
            int startLoc = input.indexOf(START, curLoc);
            if (-1 == startLoc) {
                buffer.append(input.substring(curLoc));
                result = buffer.toString();
                break; // <------------ valid exit
            } else {
                int endLoc = input.indexOf(END, startLoc);
                if (-1 == endLoc) {
                    log(SOURCE + " stripTags - no end tag - startLoc=" + startLoc);
                    break; // <------------ invalid exit
                } else if (endLoc < startLoc) {
                    log(SOURCE + " stripTags - impossible: startLoc="
                        + startLoc + " > endLoc=" + endLoc);
                    break; // <------------ invalid exit
                } else {
                    buffer.append(input.substring(curLoc, startLoc));
                    curLoc = endLoc + END.length();
                }
            }
        }
        return result;
    }

    private int findStart(String s) throws ParseException {
        int len = s.length();
        int start = 0;
        while (true) {
            start = s.indexOf("<body", start);
            if (start == -1) {
                start = s.indexOf("<BODY", start);
                if (start == -1) throw barf();
            }
            start = start + 5;
            if (start >= len) throw barf();
            char ch = s.charAt(start);
            if (ch == '>') return start + 1;
            if (Character.isWhitespace(ch)) {
                start = s.indexOf('>', start);
                if (start == -1) return -1;
                return start + 1;
            }
        }
    }

    private int findEnd(String s, int start) throws ParseException {
        int end;
        end = s.indexOf("</body>", start);
        if (end == -1) {
            end = s.indexOf("</BODY>", start);
            if (end == -1) throw barf();
        }
        return end;
    }

    private static class ParseException extends Exception {
        private static final long serialVersionUID = -1l;
    }

    private static ParseException barf() {
        return new ParseException();
    }
}