git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1738418 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_15_BETA2
@@ -0,0 +1,188 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.poifs.macros; | |||
import java.io.ByteArrayInputStream; | |||
import java.io.ByteArrayOutputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.PushbackInputStream; | |||
import java.nio.charset.Charset; | |||
import java.util.HashMap; | |||
import java.util.Map; | |||
import java.util.zip.ZipEntry; | |||
import java.util.zip.ZipInputStream; | |||
import org.apache.poi.poifs.eventfilesystem.POIFSReader; | |||
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent; | |||
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener; | |||
import org.apache.poi.poifs.filesystem.DocumentInputStream; | |||
import org.apache.poi.util.IOUtils; | |||
import org.apache.poi.util.RLEDecompressingInputStream; | |||
/** | |||
* This class is able to extract the source of all VBA Modules of an Excel file. | |||
*/ | |||
public class VBAMacroExtractor { | |||
/** | |||
* Extract macros from XLSM or XLS file. Automatically detects ZIP (XLSM, DOCX, etc) files. | |||
* | |||
* @param in | |||
* @return | |||
* @throws IOException | |||
*/ | |||
public Map<String, String> extractMacros(InputStream in) throws IOException { | |||
PushbackInputStream bpin = new PushbackInputStream(in, 2); | |||
byte[] header = new byte[2]; | |||
if (bpin.read(header) != 2) { | |||
throw new IllegalArgumentException("Invalid InputStream: cannot read 2 bytes"); | |||
} | |||
bpin.unread(header); | |||
if (header[0] == 'P' && header[1] == 'K') { | |||
ZipInputStream zis = new ZipInputStream(bpin); | |||
ZipEntry zipEntry; | |||
while ((zipEntry = zis.getNextEntry()) != null) { | |||
if ("xl/vbaProject.bin".equals(zipEntry.getName())) { | |||
try { | |||
return extractMacrosFromPOIFSInputStream(zis); | |||
} finally { | |||
zis.closeEntry(); | |||
} | |||
} | |||
} | |||
return null; | |||
} else { | |||
return extractMacrosFromPOIFSInputStream(bpin); | |||
} | |||
} | |||
/** | |||
* Extracts all macros from all modules of the provided input stream. The stream is assumed to be in POIFS format (i.e. XLS file itself or | |||
* vbaProject.bin from OOXML files) | |||
* | |||
* @param in | |||
* @return | |||
* @throws IOException | |||
*/ | |||
public Map<String, String> extractMacrosFromPOIFSInputStream(InputStream in) throws IOException { | |||
class Module { | |||
Integer offset; | |||
byte[] buf; | |||
} | |||
class ModuleMap extends HashMap<String, Module> { | |||
Charset charset = Charset.forName("Cp1252"); // default charset | |||
} | |||
try { | |||
final ModuleMap modules = new ModuleMap(); | |||
POIFSReader dirReader = new POIFSReader(); | |||
dirReader.registerListener(new POIFSReaderListener() { | |||
public void processPOIFSReaderEvent(POIFSReaderEvent event) { | |||
try { | |||
String name = event.getName(); | |||
if (event.getPath().toString().endsWith("\\VBA")) { | |||
if ("dir".equals(name)) { | |||
// process DIR | |||
RLEDecompressingInputStream in = new RLEDecompressingInputStream(event.getStream()); | |||
String streamName = null; | |||
while (true) { | |||
int id = in.readShort(); | |||
if (id == -1 || id == 0x0010) { | |||
break; // EOF or TERMINATOR | |||
} | |||
int len = in.readInt(); | |||
switch (id) { | |||
case 0x0009: // PROJECTVERSION | |||
in.skip(6); | |||
break; | |||
case 0x0003: // PROJECTCODEPAGE | |||
int codepage = in.readShort(); | |||
modules.charset = Charset.forName("Cp" + codepage); | |||
break; | |||
case 0x001A: // STREAMNAME | |||
byte[] streamNameBuf = new byte[len]; | |||
int count = in.read(streamNameBuf); | |||
streamName = new String(streamNameBuf, 0, count, modules.charset); | |||
break; | |||
case 0x0031: // MODULEOFFSET | |||
int moduleOffset = in.readInt(); | |||
Module module = modules.get(streamName); | |||
if (module != null) { | |||
ByteArrayOutputStream out = new ByteArrayOutputStream(); | |||
RLEDecompressingInputStream stream = new RLEDecompressingInputStream(new ByteArrayInputStream( | |||
module.buf, moduleOffset, module.buf.length - moduleOffset)); | |||
IOUtils.copy(stream, out); | |||
stream.close(); | |||
out.close(); | |||
module.buf = out.toByteArray(); | |||
} else { | |||
module = new Module(); | |||
module.offset = moduleOffset; | |||
modules.put(streamName, module); | |||
} | |||
break; | |||
default: | |||
in.skip(len); | |||
break; | |||
} | |||
} | |||
} else if (!name.startsWith("__SRP") && !name.startsWith("_VBA_PROJECT")) { | |||
// process module, skip __SRP and _VBA_PROJECT since these do not contain macros | |||
Module module = modules.get(name); | |||
final DocumentInputStream stream = event.getStream(); | |||
final InputStream in; | |||
if (module == null) { | |||
// no DIR stream with offsets yet, so store the compressed bytes for later | |||
module = new Module(); | |||
modules.put(name, module); | |||
in = stream; | |||
} else { | |||
// we know the offset already, so decompress immediately on-the-fly | |||
stream.skip(module.offset); | |||
in = new RLEDecompressingInputStream(stream); | |||
} | |||
final ByteArrayOutputStream out = new ByteArrayOutputStream(); | |||
IOUtils.copy(in, out); | |||
in.close(); | |||
out.close(); | |||
module.buf = out.toByteArray(); | |||
} | |||
} | |||
} catch (IOException e) { | |||
throw new RuntimeException(e); | |||
} | |||
} | |||
}); | |||
dirReader.read(in); | |||
Map<String, String> moduleSources = new HashMap<String, String>(); | |||
for (Map.Entry<String, Module> entry : modules.entrySet()) { | |||
Module module = entry.getValue(); | |||
if (module.buf != null && module.buf.length > 0) { // Skip empty modules | |||
moduleSources.put(entry.getKey(), new String(module.buf, modules.charset)); | |||
} | |||
} | |||
return moduleSources; | |||
} catch (IOException e) { | |||
e.printStackTrace(); | |||
throw e; | |||
} | |||
} | |||
} |
@@ -0,0 +1,273 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.util; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
/** | |||
* Wrapper of InputStream which provides Run Length Encoding (RLE) | |||
* decompression on the fly. Uses MS-OVBA decompression algorithm. See | |||
* http://download.microsoft.com/download/2/4/8/24862317-78F0-4C4B-B355-C7B2C1D997DB/[MS-OVBA].pdf | |||
*/ | |||
public class RLEDecompressingInputStream extends InputStream { | |||
/** | |||
* Bitmasks for performance | |||
*/ | |||
private static final int[] POWER2 = new int[] { 0x0001, // 0 | |||
0x0002, // 1 | |||
0x0004, // 2 | |||
0x0008, // 3 | |||
0x0010, // 4 | |||
0x0020, // 5 | |||
0x0040, // 6 | |||
0x0080, // 7 | |||
0x0100, // 8 | |||
0x0200, // 9 | |||
0x0400, // 10 | |||
0x0800, // 11 | |||
0x1000, // 12 | |||
0x2000, // 13 | |||
0x4000, // 14 | |||
0x8000 // 15 | |||
}; | |||
/** the wrapped inputstream */ | |||
private InputStream in; | |||
/** a byte buffer with size 4096 for storing a single chunk */ | |||
private byte[] buf; | |||
/** the current position in the byte buffer for reading */ | |||
private int pos; | |||
/** the number of bytes in the byte buffer */ | |||
private int len; | |||
/** | |||
* Creates a new wrapper RLE Decompression InputStream. | |||
* | |||
* @param in | |||
* @throws IOException | |||
*/ | |||
public RLEDecompressingInputStream(InputStream in) throws IOException { | |||
this.in = in; | |||
buf = new byte[4096]; | |||
pos = 0; | |||
int header = in.read(); | |||
if (header != 0x01) { | |||
throw new IllegalArgumentException(String.format("Header byte 0x01 expected, received 0x%02X", header & 0xFF)); | |||
} | |||
len = readChunk(); | |||
} | |||
@Override | |||
public int read() throws IOException { | |||
if (len == -1) { | |||
return -1; | |||
} | |||
if (pos >= len) { | |||
if ((len = readChunk()) == -1) { | |||
return -1; | |||
} | |||
} | |||
return buf[pos++]; | |||
} | |||
@Override | |||
public int read(byte[] b) throws IOException { | |||
return read(b, 0, b.length); | |||
} | |||
@Override | |||
public int read(byte[] b, int off, int l) throws IOException { | |||
if (len == -1) { | |||
return -1; | |||
} | |||
int offset = off; | |||
int length = l; | |||
while (length > 0) { | |||
if (pos >= len) { | |||
if ((len = readChunk()) == -1) { | |||
return offset > off ? offset - off : -1; | |||
} | |||
} | |||
int c = Math.min(length, len - pos); | |||
System.arraycopy(buf, pos, b, offset, c); | |||
pos += c; | |||
length -= c; | |||
offset += c; | |||
} | |||
return l; | |||
} | |||
@Override | |||
public long skip(long n) throws IOException { | |||
long length = n; | |||
while (length > 0) { | |||
if (pos >= len) { | |||
if ((len = readChunk()) == -1) { | |||
return -1; | |||
} | |||
} | |||
int c = (int) Math.min(n, len - pos); | |||
pos += c; | |||
length -= c; | |||
} | |||
return n; | |||
} | |||
@Override | |||
public int available() { | |||
return (len > 0 ? len - pos : 0); | |||
} | |||
@Override | |||
public void close() throws IOException { | |||
in.close(); | |||
} | |||
/** | |||
* Reads a single chunk from the underlying inputstream. | |||
* | |||
* @return | |||
* @throws IOException | |||
*/ | |||
private int readChunk() throws IOException { | |||
pos = 0; | |||
int w = readShort(in); | |||
if (w == -1) { | |||
return -1; | |||
} | |||
int chunkSize = (w & 0x0FFF) + 1; // plus 3 bytes minus 2 for the length | |||
if ((w & 0x7000) != 0x3000) { | |||
throw new IllegalArgumentException(String.format("Chunksize header A should be 0x3000, received 0x%04X", w & 0xE000)); | |||
} | |||
boolean rawChunk = (w & 0x8000) == 0; | |||
if (rawChunk) { | |||
if (in.read(buf, 0, chunkSize) < chunkSize) { | |||
throw new IllegalStateException(String.format("Not enough bytes read, expected %d", chunkSize)); | |||
} | |||
return chunkSize; | |||
} else { | |||
int inOffset = 0; | |||
int outOffset = 0; | |||
while (inOffset < chunkSize) { | |||
int tokenFlags = in.read(); | |||
inOffset++; | |||
if (tokenFlags == -1) { | |||
break; | |||
} | |||
for (int n = 0; n < 8; n++) { | |||
if (inOffset >= chunkSize) { | |||
break; | |||
} | |||
if ((tokenFlags & POWER2[n]) == 0) { | |||
// literal | |||
final int b = in.read(); | |||
if (b == -1) { | |||
return -1; | |||
} | |||
buf[outOffset++] = (byte) b; | |||
inOffset++; | |||
} else { | |||
// compressed token | |||
int token = readShort(in); | |||
if (token == -1) { | |||
return -1; | |||
} | |||
inOffset += 2; | |||
int copyLenBits = getCopyLenBits(outOffset - 1); | |||
int copyOffset = (token >> (copyLenBits)) + 1; | |||
int copyLen = (token & (POWER2[copyLenBits] - 1)) + 3; | |||
int startPos = outOffset - copyOffset; | |||
int endPos = startPos + copyLen; | |||
for (int i = startPos; i < endPos; i++) { | |||
buf[outOffset++] = buf[i]; | |||
} | |||
} | |||
} | |||
} | |||
return outOffset; | |||
} | |||
} | |||
/** | |||
* Helper method to determine how many bits in the CopyToken are used for the CopyLength. | |||
* | |||
* @param offset | |||
* @return | |||
*/ | |||
static int getCopyLenBits(int offset) { | |||
for (int n = 11; n >= 4; n--) { | |||
if ((offset & POWER2[n]) != 0) { | |||
return 15 - n; | |||
} | |||
} | |||
return 12; | |||
} | |||
/** | |||
* Convenience method for read a 2-bytes short in little endian encoding. | |||
* | |||
* @return | |||
* @throws IOException | |||
*/ | |||
public int readShort() throws IOException { | |||
return readShort(this); | |||
} | |||
/** | |||
* Convenience method for read a 4-bytes int in little endian encoding. | |||
* | |||
* @return | |||
* @throws IOException | |||
*/ | |||
public int readInt() throws IOException { | |||
return readInt(this); | |||
} | |||
private int readShort(InputStream stream) throws IOException { | |||
int b0, b1; | |||
if ((b0 = stream.read()) == -1) { | |||
return -1; | |||
} | |||
if ((b1 = stream.read()) == -1) { | |||
return -1; | |||
} | |||
return (b0 & 0xFF) | ((b1 & 0xFF) << 8); | |||
} | |||
private int readInt(InputStream stream) throws IOException { | |||
int b0, b1, b2, b3; | |||
if ((b0 = stream.read()) == -1) { | |||
return -1; | |||
} | |||
if ((b1 = stream.read()) == -1) { | |||
return -1; | |||
} | |||
if ((b2 = stream.read()) == -1) { | |||
return -1; | |||
} | |||
if ((b3 = stream.read()) == -1) { | |||
return -1; | |||
} | |||
return (b0 & 0xFF) | ((b1 & 0xFF) << 8) | ((b2 & 0xFF) << 16) | ((b3 & 0xFF) << 24); | |||
} | |||
} |