byte buf[] = new byte[bytes];\r
leis.read(buf);\r
\r
-// FileOutputStream fos = new FileOutputStream("bla16.bmp");\r
-// fos.write(buf);\r
-// fos.close();\r
- \r
- \r
-// BufferedImage bi = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);\r
-// \r
-// int size2 = 0;\r
-// byte buf[] = new byte[widthBytes];\r
-// for (int h=0; h<height; h++) {\r
-// leis.read(buf);\r
-// size2 += widthBytes;\r
-//\r
-// ImageInputStream iis = new MemoryCacheImageInputStream(new ByteArrayInputStream(buf));\r
-//\r
-// for (int w=0; w<width; w++) {\r
-// long bitsAtPixel = iis.readBits(bitsPixel);\r
-// // TODO: is bitsPixel a multiple of 3 (r,g,b)\r
-// // which colortable should be used for the various bit sizes???\r
-// \r
-// }\r
-// }\r
-// \r
-// assert (bytes == size2);\r
-//\r
-// size += size2;\r
- \r
+ // TODO: this is not implemented ... please provide a sample, if it\r
+ // ever happens to you, to come here ...\r
\r
return size;\r
}\r
import java.io.BufferedInputStream;\r
import java.io.ByteArrayInputStream;\r
import java.io.IOException;\r
+import java.io.InputStream;\r
import java.util.ArrayList;\r
import java.util.List;\r
\r
@SuppressWarnings("unused")\r
private Color colorTable[];\r
@SuppressWarnings("unused")\r
- private int colorMaskRed=0,colorMaskGreen=0,colorMaskBlue=0;\r
+ private int colorMaskR=0,colorMaskG=0,colorMaskB=0;\r
\r
// size of header and color table, for start of image data calculation\r
private int introSize;\r
introSize += readColors(leis);\r
assert(introSize < 10000);\r
\r
- int fileSize = (headerImageSize != 0) ? (int)(introSize+headerImageSize) : recordSize;\r
+ int fileSize = (headerImageSize < headerSize) ? recordSize : (int)Math.min(introSize+headerImageSize,recordSize);\r
\r
imageData = new byte[fileSize];\r
leis.reset();\r
return 0;\r
case BI_BITCOUNT_1:\r
// 2 colors\r
- return readRGBQuad(leis, 2);\r
+ return readRGBQuad(leis, (int)(headerColorUsed == 0 ? 2 : Math.min(headerColorUsed,2)));\r
case BI_BITCOUNT_2:\r
// 16 colors\r
- return readRGBQuad(leis, 16);\r
+ return readRGBQuad(leis, (int)(headerColorUsed == 0 ? 16 : Math.min(headerColorUsed,16)));\r
case BI_BITCOUNT_3:\r
// 256 colors\r
- return readRGBQuad(leis, (int)headerColorUsed);\r
- case BI_BITCOUNT_5:\r
- colorMaskRed=0xFF;\r
- colorMaskGreen=0xFF;\r
- colorMaskBlue=0xFF;\r
- return 0;\r
+ return readRGBQuad(leis, (int)(headerColorUsed == 0 ? 256 : Math.min(headerColorUsed,256)));\r
case BI_BITCOUNT_4:\r
- if (headerCompression == Compression.BI_RGB) {\r
- colorMaskBlue = 0x1F;\r
- colorMaskGreen = 0x1F<<5;\r
- colorMaskRed = 0x1F<<10;\r
+ switch (headerCompression) {\r
+ case BI_RGB:\r
+ colorMaskB = 0x1F;\r
+ colorMaskG = 0x1F<<5;\r
+ colorMaskR = 0x1F<<10;\r
return 0;\r
- } else {\r
- assert(headerCompression == Compression.BI_BITFIELDS);\r
- colorMaskBlue = leis.readInt();\r
- colorMaskGreen = leis.readInt();\r
- colorMaskRed = leis.readInt();\r
+ case BI_BITFIELDS:\r
+ colorMaskB = leis.readInt();\r
+ colorMaskG = leis.readInt();\r
+ colorMaskR = leis.readInt();\r
return 3*LittleEndianConsts.INT_SIZE;\r
+ default:\r
+ throw new IOException("Invalid compression option ("+headerCompression+") for bitcount ("+headerBitCount+").");\r
}\r
+ case BI_BITCOUNT_5:\r
case BI_BITCOUNT_6:\r
- if (headerCompression == Compression.BI_RGB) {\r
- colorMaskBlue = colorMaskGreen = colorMaskRed = 0xFF;\r
+ switch (headerCompression) {\r
+ case BI_RGB:\r
+ colorMaskR=0xFF;\r
+ colorMaskG=0xFF;\r
+ colorMaskB=0xFF;\r
return 0;\r
- } else {\r
- assert(headerCompression == Compression.BI_BITFIELDS);\r
- colorMaskBlue = leis.readInt();\r
- colorMaskGreen = leis.readInt();\r
- colorMaskRed = leis.readInt();\r
+ case BI_BITFIELDS:\r
+ colorMaskB = leis.readInt();\r
+ colorMaskG = leis.readInt();\r
+ colorMaskR = leis.readInt();\r
return 3*LittleEndianConsts.INT_SIZE;\r
+ default:\r
+ throw new IOException("Invalid compression option ("+headerCompression+") for bitcount ("+headerBitCount+").");\r
}\r
}\r
}\r
return size;\r
}\r
\r
- public BufferedImage getImage() {\r
+ public InputStream getBMPStream() {\r
if (imageData == null) {\r
throw new RecordFormatException("bitmap not initialized ... need to call init() before");\r
}\r
\r
+ // sometimes there are missing bytes after the imageData which will be 0-filled\r
+ int imageSize = (int)Math.max(imageData.length, introSize+headerImageSize);\r
+ \r
// create the image data and leave the parsing to the ImageIO api\r
- byte buf[] = new byte[BMP_HEADER_SIZE+imageData.length];\r
+ byte buf[] = new byte[BMP_HEADER_SIZE+imageSize];\r
\r
// https://en.wikipedia.org/wiki/BMP_file_format # Bitmap file header\r
buf[0] = (byte)'B';\r
buf[1] = (byte)'M';\r
// the full size of the bmp\r
- LittleEndian.putInt(buf, 2, (int)(BMP_HEADER_SIZE + introSize + headerImageSize));\r
+ LittleEndian.putInt(buf, 2, BMP_HEADER_SIZE+imageSize);\r
// the next 4 bytes are unused\r
LittleEndian.putInt(buf, 6, 0);\r
// start of image = BMP header length + dib header length + color tables length\r
LittleEndian.putInt(buf, 10, BMP_HEADER_SIZE + introSize);\r
- \r
+ // fill the "known" image data\r
System.arraycopy(imageData, 0, buf, BMP_HEADER_SIZE, imageData.length);\r
\r
+ return new ByteArrayInputStream(buf);\r
+ }\r
+ \r
+ public BufferedImage getImage() {\r
try {\r
- return ImageIO.read(new ByteArrayInputStream(buf));\r
+ return ImageIO.read(getBMPStream());\r
} catch (IOException e) {\r
- // ... shouldn't happen\r
throw new RecordFormatException("invalid bitmap data", e);\r
}\r
}\r
\r
@Override\r
public int init(LittleEndianInputStream leis, long recordSize, int recordFunction) throws IOException {\r
- boolean hasBitmap = (recordSize > ((recordFunction >> 8) + 3));\r
+ boolean hasBitmap = (recordSize/2 != ((recordFunction >> 8) + 3));\r
\r
int size = 0;\r
int rasterOpCode = leis.readUShort();\r
\r
@Override\r
public int init(LittleEndianInputStream leis, long recordSize, int recordFunction) throws IOException {\r
- boolean hasBitmap = (recordSize > ((recordFunction >> 8) + 3));\r
+ boolean hasBitmap = (recordSize/2 != ((recordFunction >> 8) + 3));\r
\r
int size = 0;\r
int rasterOpCode = leis.readUShort();\r
\r
@Override\r
public BufferedImage getImage() {\r
- return target.getImage();\r
+ return (target == null) ? null : target.getImage();\r
}\r
}\r
\r
\r
@Override\r
public int init(LittleEndianInputStream leis, long recordSize, int recordFunction) throws IOException {\r
+ // -6 bytes of record function and length header\r
+ final int remainingRecordSize = (int)(recordSize-6);\r
+ \r
y = leis.readShort();\r
x = leis.readShort();\r
stringLength = leis.readShort();\r
\r
int size = 4*LittleEndianConsts.SHORT_SIZE;\r
\r
- if (fwOpts != 0) {\r
+ if (fwOpts != 0 && size+8<=remainingRecordSize) {\r
// the bounding rectangle is optional and only read when fwOpts are given\r
left = leis.readShort();\r
top = leis.readShort();\r
text = new String(buf, 0, stringLength, LocaleUtil.CHARSET_1252);\r
size += buf.length;\r
\r
- // -6 bytes of record function and length header\r
- int remainingRecordSize = (int)(recordSize-6);\r
if (size < remainingRecordSize) {\r
if (size + stringLength*LittleEndianConsts.SHORT_SIZE < remainingRecordSize) {\r
throw new RecordFormatException("can't read Dx array - given recordSize doesn't contain enough values for string length "+stringLength);\r
count = leis.readUShort();\r
top = leis.readUShort();\r
bottom = leis.readUShort();\r
- left_scanline = new int[count];\r
- right_scanline = new int[count];\r
- for (int i=0; i*2<count; i++) {\r
+ int size = 3*LittleEndianConsts.SHORT_SIZE;\r
+ left_scanline = new int[count/2];\r
+ right_scanline = new int[count/2];\r
+ for (int i=0; i<count/2; i++) {\r
left_scanline[i] = leis.readUShort();\r
right_scanline[i] = leis.readUShort();\r
+ size += 2*LittleEndianConsts.SHORT_SIZE;\r
}\r
count2 = leis.readUShort();\r
- return 8 + count*4;\r
+ size += LittleEndianConsts.SHORT_SIZE;\r
+ return size;\r
}\r
}\r
\r
public int init(LittleEndianInputStream leis, long recordSize, int recordFunction) throws IOException {\r
nextInChain = leis.readShort();\r
objectType = leis.readShort();\r
- objectCount = leis.readUShort();\r
+ objectCount = leis.readInt();\r
regionSize = leis.readShort();\r
scanCount = leis.readShort();\r
maxScan = leis.readShort();\r
- bottom = leis.readShort();\r
- right = leis.readShort();\r
- top = leis.readShort();\r
left = leis.readShort();\r
+ top = leis.readShort();\r
+ right = leis.readShort();\r
+ bottom = leis.readShort();\r
+ \r
+ int size = 9*LittleEndianConsts.SHORT_SIZE+LittleEndianConsts.INT_SIZE;\r
+\r
+ scanObjects = new WmfScanObject[scanCount];\r
+ for (int i=0; i<scanCount; i++) {\r
+ size += (scanObjects[i] = new WmfScanObject()).init(leis);\r
+ }\r
\r
- List<WmfScanObject> soList = new ArrayList<WmfScanObject>();\r
- int scanCountI = 0, size = 0;\r
- do {\r
- WmfScanObject so = new WmfScanObject();\r
- size += so.init(leis);\r
- scanCountI += so.count;\r
- soList.add(so);\r
- } while (scanCountI < scanCount);\r
- scanObjects = soList.toArray(new WmfScanObject[soList.size()]);\r
-\r
- return 20 + size;\r
+ return size;\r
}\r
\r
@Override\r
int remainingSize = (int)(recordSize - consumedSize);\r
assert(remainingSize >= 0);\r
if (remainingSize > 0) {\r
-// byte remaining[] = new byte[remainingSize];\r
-// leis.read(remaining);\r
-// FileOutputStream fos = new FileOutputStream("remaining.dat");\r
-// fos.write(remaining);\r
-// fos.close();\r
- leis.skip(remainingSize);\r
+ // skip size in loops, because not always all bytes are skipped in one call \r
+ for (int i=remainingSize; i>0; i-=leis.skip(i));\r
}\r
}\r
}\r
import static org.junit.Assert.assertEquals;\r
\r
import java.awt.image.BufferedImage;\r
-import java.io.ByteArrayInputStream;\r
import java.io.File;\r
import java.io.FileFilter;\r
import java.io.FileInputStream;\r
import java.io.FileOutputStream;\r
+import java.io.FilterInputStream;\r
import java.io.IOException;\r
+import java.net.URL;\r
import java.util.List;\r
import java.util.Locale;\r
+import java.util.zip.ZipEntry;\r
+import java.util.zip.ZipInputStream;\r
\r
import javax.imageio.ImageIO;\r
\r
\r
@Test\r
@Ignore\r
- public void extract() throws IOException {\r
- File dir = new File("test-data/slideshow");\r
- File files[] = dir.listFiles(new FileFilter() {\r
- public boolean accept(File pathname) {\r
- return pathname.getName().matches("(?i).*\\.pptx?$");\r
- }\r
- });\r
-\r
- boolean outputFiles = false;\r
-\r
+ public void fetchWmfFromGovdocs() throws IOException {\r
+ URL url = new URL("http://digitalcorpora.org/corpora/files/govdocs1/by_type/ppt.zip");\r
File outdir = new File("build/ppt");\r
- if (outputFiles) {\r
- outdir.mkdirs();\r
- }\r
- int wmfIdx = 1;\r
- for (File f : files) {\r
+ outdir.mkdirs();\r
+ ZipInputStream zis = new ZipInputStream(url.openStream());\r
+ ZipEntry ze;\r
+ while ((ze = zis.getNextEntry()) != null) {\r
+ String basename = ze.getName().replaceAll(".*?([^/]+)\\.wmf", "$1");\r
+ FilterInputStream fis = new FilterInputStream(zis){\r
+ public void close() throws IOException {}\r
+ };\r
try {\r
- SlideShow<?,?> ss = SlideShowFactory.create(f);\r
+ SlideShow<?,?> ss = SlideShowFactory.create(fis);\r
+ int wmfIdx = 1;\r
for (PictureData pd : ss.getPictureData()) {\r
if (pd.getType() != PictureType.WMF) continue;\r
byte wmfData[] = pd.getData();\r
- if (outputFiles) {\r
- String filename = String.format(Locale.ROOT, "pic%04d.wmf", wmfIdx);\r
- FileOutputStream fos = new FileOutputStream(new File(outdir, filename));\r
- fos.write(wmfData);\r
- fos.close();\r
- }\r
-\r
- HwmfPicture wmf = new HwmfPicture(new ByteArrayInputStream(wmfData));\r
-\r
- int bmpIndex = 1;\r
- for (HwmfRecord r : wmf.getRecords()) {\r
- if (r instanceof HwmfImageRecord) {\r
- BufferedImage bi = ((HwmfImageRecord)r).getImage();\r
- if (outputFiles) {\r
- String filename = String.format(Locale.ROOT, "pic%04d-%04d.png", wmfIdx, bmpIndex);\r
- ImageIO.write(bi, "PNG", new File(outdir, filename));\r
- }\r
- bmpIndex++;\r
- }\r
- }\r
-\r
+ String filename = String.format(Locale.ROOT, "%s-%04d.wmf", basename, wmfIdx);\r
+ FileOutputStream fos = new FileOutputStream(new File(outdir, filename));\r
+ fos.write(wmfData);\r
+ fos.close();\r
wmfIdx++;\r
}\r
ss.close();\r
} catch (Exception e) {\r
- System.out.println(f+" ignored.");\r
+ System.out.println(ze.getName()+" ignored.");\r
+ }\r
+ }\r
+ }\r
+ \r
+ @Test\r
+ @Ignore\r
+ public void parseWmfs() throws IOException {\r
+ boolean outputFiles = false;\r
+ File indir = new File("build/ppt"), outdir = indir;\r
+ final String startFile = "";\r
+ File files[] = indir.listFiles(new FileFilter() {\r
+ boolean foundStartFile = false;\r
+ public boolean accept(File pathname) {\r
+ foundStartFile |= startFile.isEmpty() || pathname.getName().contains(startFile);\r
+ return foundStartFile && pathname.getName().matches("(?i).*\\.wmf?$");\r
+ }\r
+ });\r
+ for (File f : files) {\r
+ try {\r
+ String basename = f.getName().replaceAll(".*?([^/]+)\\.wmf", "$1");\r
+ FileInputStream fis = new FileInputStream(f);\r
+ HwmfPicture wmf = new HwmfPicture(fis);\r
+ fis.close();\r
+ \r
+ int bmpIndex = 1;\r
+ for (HwmfRecord r : wmf.getRecords()) {\r
+ if (r instanceof HwmfImageRecord) {\r
+ BufferedImage bi = ((HwmfImageRecord)r).getImage();\r
+ if (bi != null && outputFiles) {\r
+ String filename = String.format(Locale.ROOT, "%s-%04d.png", basename, bmpIndex);\r
+ ImageIO.write(bi, "PNG", new File(outdir, filename));\r
+ }\r
+ bmpIndex++;\r
+ }\r
+ }\r
+ } catch (Exception e) {\r
+ System.out.println(f.getName()+" ignored."); \r
}\r
}\r
}\r
-\r
}\r