You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

EmbeddedObjectRefSubRecord.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hssf.record;
  16. import java.io.ByteArrayInputStream;
  17. import java.util.Map;
  18. import java.util.function.Supplier;
  19. import org.apache.logging.log4j.LogManager;
  20. import org.apache.logging.log4j.Logger;
  21. import org.apache.poi.ss.formula.ptg.Area3DPtg;
  22. import org.apache.poi.ss.formula.ptg.AreaPtg;
  23. import org.apache.poi.ss.formula.ptg.Ptg;
  24. import org.apache.poi.ss.formula.ptg.Ref3DPtg;
  25. import org.apache.poi.ss.formula.ptg.RefPtg;
  26. import org.apache.poi.util.GenericRecordUtil;
  27. import org.apache.poi.util.IOUtils;
  28. import org.apache.poi.util.LittleEndianConsts;
  29. import org.apache.poi.util.LittleEndianInput;
  30. import org.apache.poi.util.LittleEndianInputStream;
  31. import org.apache.poi.util.LittleEndianOutput;
  32. import org.apache.poi.util.RecordFormatException;
  33. import org.apache.poi.util.StringUtil;
  34. import static org.apache.logging.log4j.util.Unbox.box;
  35. /**
  36. * ftPictFmla (0x0009)<p>
  37. * A sub-record within the OBJ record which stores a reference to an object
  38. * stored in a separate entry within the OLE2 compound file.
  39. */
  40. public final class EmbeddedObjectRefSubRecord extends SubRecord {
  41. private static final Logger LOG = LogManager.getLogger(EmbeddedObjectRefSubRecord.class);
  42. //arbitrarily selected; may need to increase
  43. private static final int MAX_RECORD_LENGTH = 100_000;
  44. public static final short sid = 0x0009;
  45. private static final byte[] EMPTY_BYTE_ARRAY = { };
  46. private int field_1_unknown_int;
  47. /** either an area or a cell ref */
  48. private Ptg field_2_refPtg;
  49. /** for when the 'formula' doesn't parse properly */
  50. private byte[] field_2_unknownFormulaData;
  51. /** note- this byte is not present in the encoding if the string length is zero */
  52. private boolean field_3_unicode_flag; // Flags whether the string is Unicode.
  53. private String field_4_ole_classname; // Classname of the embedded OLE document (e.g. Word.Document.8)
  54. /** Formulas often have a single non-zero trailing byte.
  55. * This is in a similar position to he pre-streamId padding
  56. * It is unknown if the value is important (it seems to mirror a value a few bytes earlier)
  57. * */
  58. private Byte field_4_unknownByte;
  59. private Integer field_5_stream_id; // ID of the OLE stream containing the actual data.
  60. private byte[] field_6_unknown;
  61. // currently for testing only - needs review
  62. public EmbeddedObjectRefSubRecord() {
  63. field_2_unknownFormulaData = new byte[] { 0x02, 0x6C, 0x6A, 0x16, 0x01, }; // just some sample data. These values vary a lot
  64. field_6_unknown = EMPTY_BYTE_ARRAY;
  65. field_4_ole_classname = null;
  66. }
  67. public EmbeddedObjectRefSubRecord(EmbeddedObjectRefSubRecord other) {
  68. super(other);
  69. field_1_unknown_int = other.field_1_unknown_int;
  70. field_2_refPtg = (other.field_2_refPtg == null) ? null : other.field_2_refPtg.copy();
  71. field_2_unknownFormulaData = (other.field_2_unknownFormulaData == null) ? null : other.field_2_unknownFormulaData.clone();
  72. field_3_unicode_flag = other.field_3_unicode_flag;
  73. field_4_ole_classname = other.field_4_ole_classname;
  74. field_4_unknownByte = other.field_4_unknownByte;
  75. field_5_stream_id = other.field_5_stream_id;
  76. field_6_unknown = (other.field_6_unknown == null) ? null : other.field_6_unknown.clone();
  77. }
  78. public EmbeddedObjectRefSubRecord(LittleEndianInput in, int size) {
  79. this(in,size,-1);
  80. }
  81. EmbeddedObjectRefSubRecord(LittleEndianInput in, int size, int cmoOt) {
  82. // Much guess-work going on here due to lack of any documentation.
  83. // See similar source code in OOO:
  84. // http://svn.services.openoffice.org/ooo/trunk/sc/source/filter/excel/xiescher.cxx
  85. // 1223 void XclImpOleObj::ReadPictFmla( XclImpStream& rStrm, sal_uInt16 nRecSize )
  86. int streamIdOffset = in.readShort(); // OOO calls this 'nFmlaLen'
  87. int remaining = size - LittleEndianConsts.SHORT_SIZE;
  88. int dataLenAfterFormula = remaining - streamIdOffset;
  89. int formulaSize = in.readUShort();
  90. remaining -= LittleEndianConsts.SHORT_SIZE;
  91. field_1_unknown_int = in.readInt();
  92. remaining -= LittleEndianConsts.INT_SIZE;
  93. byte[] formulaRawBytes = readRawData(in, formulaSize);
  94. remaining -= formulaSize;
  95. field_2_refPtg = readRefPtg(formulaRawBytes);
  96. if (field_2_refPtg == null) {
  97. // common case
  98. // field_2_n16 seems to be 5 here
  99. // The formula almost looks like tTbl but the row/column values seem like garbage.
  100. field_2_unknownFormulaData = formulaRawBytes;
  101. } else {
  102. field_2_unknownFormulaData = null;
  103. }
  104. int stringByteCount;
  105. if (remaining >= dataLenAfterFormula + 3) {
  106. int tag = in.readByte();
  107. stringByteCount = LittleEndianConsts.BYTE_SIZE;
  108. if (tag != 0x03) {
  109. throw new RecordFormatException("Expected byte 0x03 here");
  110. }
  111. int nChars = in.readUShort();
  112. stringByteCount += LittleEndianConsts.SHORT_SIZE;
  113. if (nChars > 0) {
  114. // OOO: the 4th way Xcl stores a unicode string: not even a Grbit byte present if length 0
  115. field_3_unicode_flag = ( in.readByte() & 0x01 ) != 0;
  116. stringByteCount += LittleEndianConsts.BYTE_SIZE;
  117. if (field_3_unicode_flag) {
  118. field_4_ole_classname = StringUtil.readUnicodeLE(in, nChars);
  119. stringByteCount += nChars * 2;
  120. } else {
  121. field_4_ole_classname = StringUtil.readCompressedUnicode(in, nChars);
  122. stringByteCount += nChars;
  123. }
  124. } else {
  125. field_4_ole_classname = "";
  126. }
  127. } else {
  128. field_4_ole_classname = null;
  129. stringByteCount = 0;
  130. }
  131. remaining -= stringByteCount;
  132. // Pad to next 2-byte boundary
  133. if (((stringByteCount + formulaSize) % 2) != 0) {
  134. int b = in.readByte();
  135. remaining -= LittleEndianConsts.BYTE_SIZE;
  136. if (field_2_refPtg != null && field_4_ole_classname == null) {
  137. field_4_unknownByte = (byte)b;
  138. }
  139. }
  140. int nUnexpectedPadding = remaining - dataLenAfterFormula;
  141. if (nUnexpectedPadding > 0) {
  142. LOG.atError().log("Discarding {} unexpected padding bytes", box(nUnexpectedPadding));
  143. readRawData(in, nUnexpectedPadding);
  144. remaining-=nUnexpectedPadding;
  145. }
  146. // Fetch the stream ID
  147. if (dataLenAfterFormula >= 4) {
  148. field_5_stream_id = in.readInt();
  149. remaining -= LittleEndianConsts.INT_SIZE;
  150. } else {
  151. field_5_stream_id = null;
  152. }
  153. field_6_unknown = readRawData(in, remaining);
  154. }
  155. public short getSid() {
  156. return sid;
  157. }
  158. private static Ptg readRefPtg(byte[] formulaRawBytes) {
  159. LittleEndianInput in = new LittleEndianInputStream(new ByteArrayInputStream(formulaRawBytes));
  160. byte ptgSid = in.readByte();
  161. switch(ptgSid) {
  162. case AreaPtg.sid: return new AreaPtg(in);
  163. case Area3DPtg.sid: return new Area3DPtg(in);
  164. case RefPtg.sid: return new RefPtg(in);
  165. case Ref3DPtg.sid: return new Ref3DPtg(in);
  166. }
  167. return null;
  168. }
  169. private static byte[] readRawData(LittleEndianInput in, int size) {
  170. if (size < 0) {
  171. throw new IllegalArgumentException("Negative size (" + size + ")");
  172. }
  173. if (size == 0) {
  174. return EMPTY_BYTE_ARRAY;
  175. }
  176. byte[] result = IOUtils.safelyAllocate(size, MAX_RECORD_LENGTH);
  177. in.readFully(result);
  178. return result;
  179. }
  180. private int getStreamIDOffset(int formulaSize) {
  181. int result = 2 + 4; // formulaSize + f2unknown_int
  182. result += formulaSize;
  183. // don't write 0x03, stringLen, flag, text
  184. if (field_4_ole_classname != null) {
  185. result += 1 + 2; // 0x03, stringLen
  186. int stringLen = field_4_ole_classname.length();
  187. if (stringLen > 0) {
  188. result += 1; // flag
  189. if (field_3_unicode_flag) {
  190. result += stringLen * 2;
  191. } else {
  192. result += stringLen;
  193. }
  194. }
  195. }
  196. // pad to next 2 byte boundary
  197. if ((result % 2) != 0) {
  198. result ++;
  199. }
  200. return result;
  201. }
  202. private int getDataSize(int idOffset) {
  203. int result = 2 + idOffset; // 2 for idOffset short field itself
  204. if (field_5_stream_id != null) {
  205. result += 4;
  206. }
  207. return result + field_6_unknown.length;
  208. }
  209. protected int getDataSize() {
  210. int formulaSize = field_2_refPtg == null ? field_2_unknownFormulaData.length : field_2_refPtg.getSize();
  211. int idOffset = getStreamIDOffset(formulaSize);
  212. return getDataSize(idOffset);
  213. }
  214. public void serialize(LittleEndianOutput out) {
  215. int formulaSize = field_2_refPtg == null ? field_2_unknownFormulaData.length : field_2_refPtg.getSize();
  216. int idOffset = getStreamIDOffset(formulaSize);
  217. int dataSize = getDataSize(idOffset);
  218. out.writeShort(sid);
  219. out.writeShort(dataSize);
  220. out.writeShort(idOffset);
  221. out.writeShort(formulaSize);
  222. out.writeInt(field_1_unknown_int);
  223. int pos = 12;
  224. if (field_2_refPtg == null) {
  225. out.write(field_2_unknownFormulaData);
  226. } else {
  227. field_2_refPtg.write(out);
  228. }
  229. pos += formulaSize;
  230. // don't write 0x03, stringLen, flag, text
  231. if (field_4_ole_classname != null) {
  232. out.writeByte(0x03);
  233. pos+=1;
  234. int stringLen = field_4_ole_classname.length();
  235. out.writeShort(stringLen);
  236. pos+=2;
  237. if (stringLen > 0) {
  238. out.writeByte(field_3_unicode_flag ? 0x01 : 0x00);
  239. pos+=1;
  240. if (field_3_unicode_flag) {
  241. StringUtil.putUnicodeLE(field_4_ole_classname, out);
  242. pos += stringLen * 2;
  243. } else {
  244. StringUtil.putCompressedUnicode(field_4_ole_classname, out);
  245. pos += stringLen;
  246. }
  247. }
  248. }
  249. // pad to next 2-byte boundary (requires 0 or 1 bytes)
  250. switch(idOffset - (pos - 6)) { // 6 for 3 shorts: sid, dataSize, idOffset
  251. case 1:
  252. out.writeByte(field_4_unknownByte == null ? 0x00 : field_4_unknownByte.intValue());
  253. break;
  254. case 0:
  255. break;
  256. default:
  257. throw new IllegalStateException("Bad padding calculation (" + idOffset + ", " + pos + ")");
  258. }
  259. if (field_5_stream_id != null) {
  260. out.writeInt(field_5_stream_id);
  261. }
  262. out.write(field_6_unknown);
  263. }
  264. /**
  265. * Gets the stream ID containing the actual data. The data itself
  266. * can be found under a top-level directory entry in the OLE2 filesystem
  267. * under the name "MBD<var>xxxxxxxx</var>" where <var>xxxxxxxx</var> is
  268. * this ID converted into hex (in big endian order, funnily enough.)
  269. *
  270. * @return the data stream ID. Possibly <code>null</code>
  271. */
  272. public Integer getStreamId() {
  273. return field_5_stream_id;
  274. }
  275. public String getOLEClassName() {
  276. return field_4_ole_classname;
  277. }
  278. public byte[] getObjectData() {
  279. return field_6_unknown;
  280. }
  281. @Override
  282. public EmbeddedObjectRefSubRecord copy() {
  283. return new EmbeddedObjectRefSubRecord(this);
  284. }
  285. public void setUnknownFormulaData(byte[] formularData) {
  286. field_2_unknownFormulaData = formularData;
  287. }
  288. public void setOleClassname(String oleClassname) {
  289. field_4_ole_classname = oleClassname;
  290. }
  291. public void setStorageId(int storageId) {
  292. field_5_stream_id = storageId;
  293. }
  294. @Override
  295. public SubRecordTypes getGenericRecordType() {
  296. return SubRecordTypes.EMBEDDED_OBJECT_REF;
  297. }
  298. @Override
  299. public Map<String, Supplier<?>> getGenericProperties() {
  300. return GenericRecordUtil.getGenericProperties(
  301. "f2unknown", () -> field_1_unknown_int,
  302. "f3unknown", () -> field_2_unknownFormulaData,
  303. "formula", () -> field_2_refPtg,
  304. "unicodeFlag", () -> field_3_unicode_flag,
  305. "oleClassname", () -> field_4_ole_classname,
  306. "f4unknown", () -> field_4_unknownByte,
  307. "streamId", () -> field_5_stream_id,
  308. "f7unknown", () -> field_6_unknown
  309. );
  310. }
  311. }