import static org.apache.poi.poifs.common.POIFSConstants.OOXML_FILE_HEADER;
import static org.apache.poi.poifs.common.POIFSConstants.RAW_XML_FILE_HEADER;
-import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Arrays;
import org.apache.poi.poifs.storage.HeaderBlockConstants;
import org.apache.poi.util.IOUtils;
/** UNKNOWN magic */
UNKNOWN(new byte[0]);
+ // update this if a longer pattern is added
+ final static int MAX_PATTERN_LENGTH = 12;
+
final byte[][] magic;
FileMagic(long magic) {
public static FileMagic valueOf(byte[] magic) {
for (FileMagic fm : values()) {
for (byte[] ma : fm.magic) {
+ // don't try to match if the given byte-array is too short
+ // for this pattern anyway
+ if(magic.length < ma.length) {
+ continue;
+ }
+
if (findMagic(ma, magic)) {
return fm;
}
*/
public static FileMagic valueOf(final File inp) throws IOException {
try (FileInputStream fis = new FileInputStream(inp)) {
- final byte[] data = IOUtils.toByteArray(fis, 8);
+ // read as many bytes as possible, up to the required number of bytes
+ byte[] data = new byte[MAX_PATTERN_LENGTH];
+ int read = IOUtils.readFully(fis, data, 0, MAX_PATTERN_LENGTH);
+
+ // only use the bytes that could be read
+ data = Arrays.copyOf(data, read);
+
return FileMagic.valueOf(data);
}
}
throw new IOException("getFileMagic() only operates on streams which support mark(int)");
}
- // Grab the first 8 bytes
- byte[] data = IOUtils.peekFirst8Bytes(inp);
+ // Grab the first bytes of this stream
+ byte[] data = IOUtils.peekFirstNBytes(inp, MAX_PATTERN_LENGTH);
return FileMagic.valueOf(data);
}
import org.junit.Test;
import java.io.BufferedInputStream;
+import java.io.File;
import java.io.FileInputStream;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Random;
import static org.junit.Assert.*;
assertEquals(FileMagic.HTML, FileMagic.valueOf("\r\n<html".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("\r<html".getBytes(Charsets.UTF_8)));
+ assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xDB }));
+ assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xE0, 'a', 'b', 'J', 'F', 'I', 'F', 0x00, 0x01 }));
+ assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xEE }));
+ assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xE1, 'd', 'c', 'E', 'x', 'i', 'f', 0x00, 0x00 }));
+
+ assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf("something".getBytes(Charsets.UTF_8)));
+ assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(new byte[0]));
+
try {
FileMagic.valueOf("some string");
fail("Should catch exception here");
assertNotSame(stream, FileMagic.prepareToCheckMagic(stream));
}
}
+
+ @Test
+ public void testMatchingButTooLessData() {
+ // this matches JPG, but is not long enough, previously this caused an Exception
+ byte[] data = new byte[] { -1, -40, -1, -32, 0, 16, 74, 70 };
+
+ assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(data));
+ }
+
+ @Test
+ public void testShortFile() throws IOException {
+ // having a file shorter than 8 bytes previously caused an exception
+ byte[] data = new byte[] { -1, -40, -1, -32, 0 };
+
+ File file = File.createTempFile("TestFileMagic", ".bin");
+ try {
+ try (FileOutputStream fos = new FileOutputStream(file)) {
+ fos.write(data);
+ }
+
+ assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(file));
+ } finally {
+ assertTrue(file.delete());
+ }
+ }
+
+ @Test(expected = IOException.class)
+ public void testMarkRequired() throws IOException {
+ byte[] data = new byte[] { -1, -40, -1, -32, 0 };
+
+ File file = File.createTempFile("TestFileMagic", ".bin");
+ try {
+ try (FileOutputStream fos = new FileOutputStream(file)) {
+ fos.write(data);
+ }
+
+ // a FileInputStream does not support "marking"
+ try (FileInputStream str = new FileInputStream(file)) {
+ assertFalse(str.markSupported());
+
+ FileMagic.valueOf(str);
+ }
+ } finally {
+ assertTrue(file.delete());
+ }
+ }
+
+ @Test
+ public void testPatterns() {
+ // just try to trash the functionality with some byte-patterns
+ for(int i = 0; i < 256;i++) {
+ final byte[] data = new byte[12];
+ for(int j = 0;j < 12; j++) {
+ data[j] = (byte)i;
+
+ assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(data));
+ }
+ }
+ }
+
+ @Test
+ public void testRandomPatterns() {
+ Random random = new Random();
+
+ // just try to trash the functionality with some byte-patterns
+ for(int i = 0; i < 1000;i++) {
+ final byte[] data = new byte[12];
+ random.nextBytes(data);
+
+ // we cannot check for UNKNOWN as we might hit valid byte-patterns here as well
+ try {
+ assertNotNull(FileMagic.valueOf(data));
+ } catch (Exception e) {
+ throw new IllegalStateException("Failed with pattern " + Arrays.toString(data), e);
+ }
+ }
+ }
}