You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

EmbeddedObjectRefSubRecord.java 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hssf.record;
  16. import java.io.ByteArrayInputStream;
  17. import org.apache.poi.hssf.record.formula.Area3DPtg;
  18. import org.apache.poi.hssf.record.formula.AreaPtg;
  19. import org.apache.poi.hssf.record.formula.Ptg;
  20. import org.apache.poi.hssf.record.formula.Ref3DPtg;
  21. import org.apache.poi.hssf.record.formula.RefPtg;
  22. import org.apache.poi.util.HexDump;
  23. import org.apache.poi.util.LittleEndian;
  24. import org.apache.poi.util.LittleEndianInput;
  25. import org.apache.poi.util.LittleEndianInputStream;
  26. import org.apache.poi.util.LittleEndianOutput;
  27. import org.apache.poi.util.StringUtil;
  28. /**
  29. * ftPictFmla (0x0009)<br/>
  30. * A sub-record within the OBJ record which stores a reference to an object
  31. * stored in a separate entry within the OLE2 compound file.
  32. *
  33. * @author Daniel Noll
  34. */
  35. public final class EmbeddedObjectRefSubRecord extends SubRecord {
  36. public static final short sid = 0x0009;
  37. private static final byte[] EMPTY_BYTE_ARRAY = { };
  38. private int field_1_unknown_int;
  39. /** either an area or a cell ref */
  40. private Ptg field_2_refPtg;
  41. /** for when the 'formula' doesn't parse properly */
  42. private byte[] field_2_unknownFormulaData;
  43. /** note- this byte is not present in the encoding if the string length is zero */
  44. private boolean field_3_unicode_flag; // Flags whether the string is Unicode.
  45. private String field_4_ole_classname; // Classname of the embedded OLE document (e.g. Word.Document.8)
  46. /** Formulas often have a single non-zero trailing byte.
  47. * This is in a similar position to he pre-streamId padding
  48. * It is unknown if the value is important (it seems to mirror a value a few bytes earlier)
  49. * */
  50. private Byte field_4_unknownByte;
  51. private Integer field_5_stream_id; // ID of the OLE stream containing the actual data.
  52. private byte[] field_6_unknown;
  53. // currently for testing only - needs review
  54. EmbeddedObjectRefSubRecord() {
  55. field_2_unknownFormulaData = new byte[] { 0x02, 0x6C, 0x6A, 0x16, 0x01, }; // just some sample data. These values vary a lot
  56. field_6_unknown = EMPTY_BYTE_ARRAY;
  57. field_4_ole_classname = null;
  58. }
  59. public short getSid() {
  60. return sid;
  61. }
  62. public EmbeddedObjectRefSubRecord(LittleEndianInput in, int size) {
  63. // Much guess-work going on here due to lack of any documentation.
  64. // See similar source code in OOO:
  65. // http://svn.services.openoffice.org/ooo/trunk/sc/source/filter/excel/xiescher.cxx
  66. // 1223 void XclImpOleObj::ReadPictFmla( XclImpStream& rStrm, sal_uInt16 nRecSize )
  67. int streamIdOffset = in.readShort(); // OOO calls this 'nFmlaLen'
  68. int remaining = size - LittleEndian.SHORT_SIZE;
  69. int dataLenAfterFormula = remaining - streamIdOffset;
  70. int formulaSize = in.readUShort();
  71. remaining -= LittleEndian.SHORT_SIZE;
  72. field_1_unknown_int = in.readInt();
  73. remaining -= LittleEndian.INT_SIZE;
  74. byte[] formulaRawBytes = readRawData(in, formulaSize);
  75. remaining -= formulaSize;
  76. field_2_refPtg = readRefPtg(formulaRawBytes);
  77. if (field_2_refPtg == null) {
  78. // common case
  79. // field_2_n16 seems to be 5 here
  80. // The formula almost looks like tTbl but the row/column values seem like garbage.
  81. field_2_unknownFormulaData = formulaRawBytes;
  82. } else {
  83. field_2_unknownFormulaData = null;
  84. }
  85. int stringByteCount;
  86. if (remaining >= dataLenAfterFormula + 3) {
  87. int tag = in.readByte();
  88. stringByteCount = LittleEndian.BYTE_SIZE;
  89. if (tag != 0x03) {
  90. throw new RecordFormatException("Expected byte 0x03 here");
  91. }
  92. int nChars = in.readUShort();
  93. stringByteCount += LittleEndian.SHORT_SIZE;
  94. if (nChars > 0) {
  95. // OOO: the 4th way Xcl stores a unicode string: not even a Grbit byte present if length 0
  96. field_3_unicode_flag = ( in.readByte() & 0x01 ) != 0;
  97. stringByteCount += LittleEndian.BYTE_SIZE;
  98. if (field_3_unicode_flag) {
  99. field_4_ole_classname = StringUtil.readUnicodeLE(in, nChars);
  100. stringByteCount += nChars * 2;
  101. } else {
  102. field_4_ole_classname = StringUtil.readCompressedUnicode(in, nChars);
  103. stringByteCount += nChars;
  104. }
  105. } else {
  106. field_4_ole_classname = "";
  107. }
  108. } else {
  109. field_4_ole_classname = null;
  110. stringByteCount = 0;
  111. }
  112. remaining -= stringByteCount;
  113. // Pad to next 2-byte boundary
  114. if (((stringByteCount + formulaSize) % 2) != 0) {
  115. int b = in.readByte();
  116. remaining -= LittleEndian.BYTE_SIZE;
  117. if (field_2_refPtg != null && field_4_ole_classname == null) {
  118. field_4_unknownByte = Byte.valueOf((byte)b);
  119. }
  120. }
  121. int nUnexpectedPadding = remaining - dataLenAfterFormula;
  122. if (nUnexpectedPadding > 0) {
  123. System.err.println("Discarding " + nUnexpectedPadding + " unexpected padding bytes ");
  124. readRawData(in, nUnexpectedPadding);
  125. remaining-=nUnexpectedPadding;
  126. }
  127. // Fetch the stream ID
  128. if (dataLenAfterFormula >= 4) {
  129. field_5_stream_id = Integer.valueOf(in.readInt());
  130. remaining -= LittleEndian.INT_SIZE;
  131. } else {
  132. field_5_stream_id = null;
  133. }
  134. field_6_unknown = readRawData(in, remaining);
  135. }
  136. private static Ptg readRefPtg(byte[] formulaRawBytes) {
  137. LittleEndianInput in = new LittleEndianInputStream(new ByteArrayInputStream(formulaRawBytes));
  138. byte ptgSid = in.readByte();
  139. switch(ptgSid) {
  140. case AreaPtg.sid: return new AreaPtg(in);
  141. case Area3DPtg.sid: return new Area3DPtg(in);
  142. case RefPtg.sid: return new RefPtg(in);
  143. case Ref3DPtg.sid: return new Ref3DPtg(in);
  144. }
  145. return null;
  146. }
  147. private static byte[] readRawData(LittleEndianInput in, int size) {
  148. if (size < 0) {
  149. throw new IllegalArgumentException("Negative size (" + size + ")");
  150. }
  151. if (size == 0) {
  152. return EMPTY_BYTE_ARRAY;
  153. }
  154. byte[] result = new byte[size];
  155. in.readFully(result);
  156. return result;
  157. }
  158. private int getStreamIDOffset(int formulaSize) {
  159. int result = 2 + 4; // formulaSize + f2unknown_int
  160. result += formulaSize;
  161. int stringLen;
  162. if (field_4_ole_classname == null) {
  163. // don't write 0x03, stringLen, flag, text
  164. stringLen = 0;
  165. } else {
  166. result += 1 + 2; // 0x03, stringLen
  167. stringLen = field_4_ole_classname.length();
  168. if (stringLen > 0) {
  169. result += 1; // flag
  170. if (field_3_unicode_flag) {
  171. result += stringLen * 2;
  172. } else {
  173. result += stringLen;
  174. }
  175. }
  176. }
  177. // pad to next 2 byte boundary
  178. if ((result % 2) != 0) {
  179. result ++;
  180. }
  181. return result;
  182. }
  183. private int getDataSize(int idOffset) {
  184. int result = 2 + idOffset; // 2 for idOffset short field itself
  185. if (field_5_stream_id != null) {
  186. result += 4;
  187. }
  188. return result + field_6_unknown.length;
  189. }
  190. protected int getDataSize() {
  191. int formulaSize = field_2_refPtg == null ? field_2_unknownFormulaData.length : field_2_refPtg.getSize();
  192. int idOffset = getStreamIDOffset(formulaSize);
  193. return getDataSize(idOffset);
  194. }
  195. public void serialize(LittleEndianOutput out) {
  196. int formulaSize = field_2_refPtg == null ? field_2_unknownFormulaData.length : field_2_refPtg.getSize();
  197. int idOffset = getStreamIDOffset(formulaSize);
  198. int dataSize = getDataSize(idOffset);
  199. out.writeShort(sid);
  200. out.writeShort(dataSize);
  201. out.writeShort(idOffset);
  202. out.writeShort(formulaSize);
  203. out.writeInt(field_1_unknown_int);
  204. int pos = 12;
  205. if (field_2_refPtg == null) {
  206. out.write(field_2_unknownFormulaData);
  207. } else {
  208. field_2_refPtg.write(out);
  209. }
  210. pos += formulaSize;
  211. int stringLen;
  212. if (field_4_ole_classname == null) {
  213. // don't write 0x03, stringLen, flag, text
  214. stringLen = 0;
  215. } else {
  216. out.writeByte(0x03);
  217. pos+=1;
  218. stringLen = field_4_ole_classname.length();
  219. out.writeShort(stringLen);
  220. pos+=2;
  221. if (stringLen > 0) {
  222. out.writeByte(field_3_unicode_flag ? 0x01 : 0x00);
  223. pos+=1;
  224. if (field_3_unicode_flag) {
  225. StringUtil.putUnicodeLE(field_4_ole_classname, out);
  226. pos += stringLen * 2;
  227. } else {
  228. StringUtil.putCompressedUnicode(field_4_ole_classname, out);
  229. pos += stringLen;
  230. }
  231. }
  232. }
  233. // pad to next 2-byte boundary (requires 0 or 1 bytes)
  234. switch(idOffset - (pos - 6)) { // 6 for 3 shorts: sid, dataSize, idOffset
  235. case 1:
  236. out.writeByte(field_4_unknownByte == null ? 0x00 : field_4_unknownByte.intValue());
  237. pos ++;
  238. case 0:
  239. break;
  240. default:
  241. throw new IllegalStateException("Bad padding calculation (" + idOffset + ", " + pos + ")");
  242. }
  243. if (field_5_stream_id != null) {
  244. out.writeInt(field_5_stream_id.intValue());
  245. pos += 4;
  246. }
  247. out.write(field_6_unknown);
  248. }
  249. /**
  250. * Gets the stream ID containing the actual data. The data itself
  251. * can be found under a top-level directory entry in the OLE2 filesystem
  252. * under the name "MBD<var>xxxxxxxx</var>" where <var>xxxxxxxx</var> is
  253. * this ID converted into hex (in big endian order, funnily enough.)
  254. *
  255. * @return the data stream ID. Possibly <code>null</code>
  256. */
  257. public Integer getStreamId() {
  258. return field_5_stream_id;
  259. }
  260. public String getOLEClassName() {
  261. return field_4_ole_classname;
  262. }
  263. public byte[] getObjectData() {
  264. return field_6_unknown;
  265. }
  266. public Object clone() {
  267. return this; // TODO proper clone
  268. }
  269. public String toString() {
  270. StringBuffer sb = new StringBuffer();
  271. sb.append("[ftPictFmla]\n");
  272. sb.append(" .f2unknown = ").append(HexDump.intToHex(field_1_unknown_int)).append("\n");
  273. if (field_2_refPtg == null) {
  274. sb.append(" .f3unknown = ").append(HexDump.toHex(field_2_unknownFormulaData)).append("\n");
  275. } else {
  276. sb.append(" .formula = ").append(field_2_refPtg.toString()).append("\n");
  277. }
  278. if (field_4_ole_classname != null) {
  279. sb.append(" .unicodeFlag = ").append(field_3_unicode_flag).append("\n");
  280. sb.append(" .oleClassname = ").append(field_4_ole_classname).append("\n");
  281. }
  282. if (field_4_unknownByte != null) {
  283. sb.append(" .f4unknown = ").append(HexDump.byteToHex(field_4_unknownByte.intValue())).append("\n");
  284. }
  285. if (field_5_stream_id != null) {
  286. sb.append(" .streamId = ").append(HexDump.intToHex(field_5_stream_id.intValue())).append("\n");
  287. }
  288. if (field_6_unknown.length > 0) {
  289. sb.append(" .f7unknown = ").append(HexDump.toHex(field_6_unknown)).append("\n");
  290. }
  291. sb.append("[/ftPictFmla]");
  292. return sb.toString();
  293. }
  294. }