You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

HyperlinkRecord.java 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hssf.record;
  16. import static org.apache.logging.log4j.util.Unbox.box;
  17. import static org.apache.poi.hpsf.ClassIDPredefined.FILE_MONIKER;
  18. import static org.apache.poi.hpsf.ClassIDPredefined.STD_MONIKER;
  19. import static org.apache.poi.hpsf.ClassIDPredefined.URL_MONIKER;
  20. import static org.apache.poi.util.GenericRecordUtil.getBitsAsString;
  21. import static org.apache.poi.util.HexDump.toHex;
  22. import java.util.Map;
  23. import java.util.function.Supplier;
  24. import org.apache.logging.log4j.LogManager;
  25. import org.apache.logging.log4j.Logger;
  26. import org.apache.poi.hpsf.ClassID;
  27. import org.apache.poi.hpsf.ClassIDPredefined;
  28. import org.apache.poi.ss.util.CellRangeAddress;
  29. import org.apache.poi.util.GenericRecordUtil;
  30. import org.apache.poi.util.HexRead;
  31. import org.apache.poi.util.IOUtils;
  32. import org.apache.poi.util.LittleEndianInput;
  33. import org.apache.poi.util.LittleEndianOutput;
  34. import org.apache.poi.util.RecordFormatException;
  35. import org.apache.poi.util.StringUtil;
  36. /**
  37. * The <code>HyperlinkRecord</code> (0x01B8) wraps an HLINK-record
  38. * from the Excel-97 format.
  39. * Supports only external links for now (eg http://)
  40. */
  41. public final class HyperlinkRecord extends StandardRecord {
  42. public static final short sid = 0x01B8;
  43. private static final Logger LOG = LogManager.getLogger(HyperlinkRecord.class);
  44. //arbitrarily selected; may need to increase
  45. private static final int MAX_RECORD_LENGTH = 100_000;
  46. /*
  47. * Link flags
  48. */
  49. static final int HLINK_URL = 0x01; // File link or URL.
  50. static final int HLINK_ABS = 0x02; // Absolute path.
  51. static final int HLINK_LABEL = 0x14; // Has label/description.
  52. /** Place in worksheet. If set, the {@link #_textMark} field will be present */
  53. static final int HLINK_PLACE = 0x08;
  54. private static final int HLINK_TARGET_FRAME = 0x80; // has 'target frame'
  55. private static final int HLINK_UNC_PATH = 0x100; // has UNC path
  56. /** expected Tail of a URL link */
  57. private static final byte[] URL_TAIL = HexRead.readFromString("79 58 81 F4 3B 1D 7F 48 AF 2C 82 5D C4 85 27 63 00 00 00 00 A5 AB 00 00");
  58. /** expected Tail of a file link */
  59. private static final byte[] FILE_TAIL = HexRead.readFromString("FF FF AD DE 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00");
  60. private static final int TAIL_SIZE = FILE_TAIL.length;
  61. /** cell range of this hyperlink */
  62. private CellRangeAddress _range;
  63. /** 16-byte GUID */
  64. private ClassID _guid;
  65. /** Some sort of options for file links. */
  66. private int _fileOpts;
  67. /** Link options. Can include any of HLINK_* flags. */
  68. private int _linkOpts;
  69. /** Test label */
  70. private String _label;
  71. private String _targetFrame;
  72. /** Moniker. Makes sense only for URL and file links */
  73. private ClassID _moniker;
  74. /** in 8:3 DOS format No Unicode string header,
  75. * always 8-bit characters, zero-terminated */
  76. private String _shortFilename;
  77. /** Link */
  78. private String _address;
  79. /**
  80. * Text describing a place in document. In Excel UI, this is appended to the
  81. * address, (after a '#' delimiter).<br>
  82. * This field is optional. If present, the {@link #HLINK_PLACE} must be set.
  83. */
  84. private String _textMark;
  85. private byte[] _uninterpretedTail;
  86. /**
  87. * Create a new hyperlink
  88. */
  89. public HyperlinkRecord() {}
  90. public HyperlinkRecord(HyperlinkRecord other) {
  91. super(other);
  92. _range = (other._range == null) ? null : other._range.copy();
  93. _guid = (other._guid == null) ? null : other._guid.copy();
  94. _fileOpts = other._fileOpts;
  95. _linkOpts = other._linkOpts;
  96. _label = other._label;
  97. _targetFrame = other._targetFrame;
  98. _moniker = (other._moniker == null) ? null : other._moniker.copy();
  99. _shortFilename = other._shortFilename;
  100. _address = other._address;
  101. _textMark = other._textMark;
  102. _uninterpretedTail = (other._uninterpretedTail == null) ? null : other._uninterpretedTail.clone();
  103. }
  104. /**
  105. * @return the 0-based column of the first cell that contains this hyperlink
  106. */
  107. public int getFirstColumn() {
  108. return _range.getFirstColumn();
  109. }
  110. /**
  111. * Set the first column (zero-based) of the range that contains this hyperlink
  112. *
  113. * @param firstCol the first column (zero-based)
  114. */
  115. public void setFirstColumn(int firstCol) {
  116. _range.setFirstColumn(firstCol);
  117. }
  118. /**
  119. * @return the 0-based column of the last cell that contains this hyperlink
  120. */
  121. public int getLastColumn() {
  122. return _range.getLastColumn();
  123. }
  124. /**
  125. * Set the last column (zero-based) of the range that contains this hyperlink
  126. *
  127. * @param lastCol the last column (zero-based)
  128. */
  129. public void setLastColumn(int lastCol) {
  130. _range.setLastColumn(lastCol);
  131. }
  132. /**
  133. * @return the 0-based row of the first cell that contains this hyperlink
  134. */
  135. public int getFirstRow() {
  136. return _range.getFirstRow();
  137. }
  138. /**
  139. * Set the first row (zero-based) of the range that contains this hyperlink
  140. *
  141. * @param firstRow the first row (zero-based)
  142. */
  143. public void setFirstRow(int firstRow) {
  144. _range.setFirstRow(firstRow);
  145. }
  146. /**
  147. * @return the 0-based row of the last cell that contains this hyperlink
  148. */
  149. public int getLastRow() {
  150. return _range.getLastRow();
  151. }
  152. /**
  153. * Set the last row (zero-based) of the range that contains this hyperlink
  154. *
  155. * @param lastRow the last row (zero-based)
  156. */
  157. public void setLastRow(int lastRow) {
  158. _range.setLastRow(lastRow);
  159. }
  160. /**
  161. * @return 16-byte guid identifier Seems to always equal {@link ClassIDPredefined#STD_MONIKER}
  162. */
  163. ClassID getGuid() {
  164. return _guid;
  165. }
  166. /**
  167. * @return 16-byte moniker
  168. */
  169. ClassID getMoniker()
  170. {
  171. return _moniker;
  172. }
  173. private static String cleanString(String s) {
  174. if (s == null) {
  175. return null;
  176. }
  177. int idx = s.indexOf('\u0000');
  178. if (idx < 0) {
  179. return s;
  180. }
  181. return s.substring(0, idx);
  182. }
  183. private static String appendNullTerm(String s) {
  184. if (s == null) {
  185. return null;
  186. }
  187. return s + '\u0000';
  188. }
  189. /**
  190. * Return text label for this hyperlink
  191. *
  192. * @return text to display
  193. */
  194. public String getLabel() {
  195. return cleanString(_label);
  196. }
  197. /**
  198. * Sets text label for this hyperlink
  199. *
  200. * @param label text label for this hyperlink
  201. */
  202. public void setLabel(String label) {
  203. _label = appendNullTerm(label);
  204. }
  205. public String getTargetFrame() {
  206. return cleanString(_targetFrame);
  207. }
  208. /**
  209. * Hyperlink address. Depending on the hyperlink type it can be URL, e-mail, path to a file, etc.
  210. *
  211. * @return the address of this hyperlink
  212. */
  213. public String getAddress() {
  214. if ((_linkOpts & HLINK_URL) != 0 && FILE_MONIKER.equals(_moniker)) {
  215. return cleanString(_address != null ? _address : _shortFilename);
  216. } else if((_linkOpts & HLINK_PLACE) != 0) {
  217. return cleanString(_textMark);
  218. } else {
  219. return cleanString(_address);
  220. }
  221. }
  222. /**
  223. * Hyperlink address. Depending on the hyperlink type it can be URL, e-mail, path to a file, etc.
  224. *
  225. * @param address the address of this hyperlink
  226. */
  227. public void setAddress(String address) {
  228. if ((_linkOpts & HLINK_URL) != 0 && FILE_MONIKER.equals(_moniker)) {
  229. _shortFilename = appendNullTerm(address);
  230. } else if((_linkOpts & HLINK_PLACE) != 0) {
  231. _textMark = appendNullTerm(address);
  232. } else {
  233. _address = appendNullTerm(address);
  234. }
  235. }
  236. public String getShortFilename() {
  237. return cleanString(_shortFilename);
  238. }
  239. public void setShortFilename(String shortFilename) {
  240. _shortFilename = appendNullTerm(shortFilename);
  241. }
  242. public String getTextMark() {
  243. return cleanString(_textMark);
  244. }
  245. public void setTextMark(String textMark) {
  246. _textMark = appendNullTerm(textMark);
  247. }
  248. /**
  249. * Link options. Must be a combination of HLINK_* constants.
  250. * For testing only
  251. *
  252. * @return Link options
  253. */
  254. int getLinkOptions(){
  255. return _linkOpts;
  256. }
  257. /**
  258. * @return Label options
  259. */
  260. public int getLabelOptions(){
  261. return 2; // always 2
  262. }
  263. /**
  264. * @return Options for a file link
  265. */
  266. public int getFileOptions(){
  267. return _fileOpts;
  268. }
  269. public HyperlinkRecord(RecordInputStream in) {
  270. _range = new CellRangeAddress(in);
  271. _guid = new ClassID(in);
  272. /*
  273. * streamVersion (4 bytes): An unsigned integer that specifies the version number
  274. * of the serialization implementation used to save this structure. This value MUST equal 2.
  275. */
  276. int streamVersion = in.readInt();
  277. if (streamVersion != 0x00000002) {
  278. throw new RecordFormatException("Stream Version must be 0x2 but found " + streamVersion);
  279. }
  280. _linkOpts = in.readInt();
  281. if ((_linkOpts & HLINK_LABEL) != 0){
  282. int label_len = in.readInt();
  283. _label = in.readUnicodeLEString(label_len);
  284. }
  285. if ((_linkOpts & HLINK_TARGET_FRAME) != 0){
  286. int len = in.readInt();
  287. _targetFrame = in.readUnicodeLEString(len);
  288. }
  289. if ((_linkOpts & HLINK_URL) != 0 && (_linkOpts & HLINK_UNC_PATH) != 0) {
  290. _moniker = null;
  291. int nChars = in.readInt();
  292. _address = in.readUnicodeLEString(nChars);
  293. }
  294. if ((_linkOpts & HLINK_URL) != 0 && (_linkOpts & HLINK_UNC_PATH) == 0) {
  295. _moniker = new ClassID(in);
  296. if(URL_MONIKER.equals(_moniker)){
  297. int length = in.readInt();
  298. /*
  299. * The value of <code>length<code> be either the byte size of the url field
  300. * (including the terminating NULL character) or the byte size of the url field plus 24.
  301. * If the value of this field is set to the byte size of the url field,
  302. * then the tail bytes fields are not present.
  303. */
  304. int remaining = in.remaining();
  305. if (length == remaining) {
  306. int nChars = length/2;
  307. _address = in.readUnicodeLEString(nChars);
  308. } else {
  309. int nChars = (length - TAIL_SIZE)/2;
  310. _address = in.readUnicodeLEString(nChars);
  311. /*
  312. * TODO: make sense of the remaining bytes
  313. * According to the spec they consist of:
  314. * 1. 16-byte GUID: This field MUST equal
  315. * {0xF4815879, 0x1D3B, 0x487F, 0xAF, 0x2C, 0x82, 0x5D, 0xC4, 0x85, 0x27, 0x63}
  316. * 2. Serial version, this field MUST equal 0 if present.
  317. * 3. URI Flags
  318. */
  319. _uninterpretedTail = readTail(URL_TAIL, in);
  320. }
  321. } else if (FILE_MONIKER.equals(_moniker)) {
  322. _fileOpts = in.readShort();
  323. int len = in.readInt();
  324. _shortFilename = StringUtil.readCompressedUnicode(in, len);
  325. _uninterpretedTail = readTail(FILE_TAIL, in);
  326. int size = in.readInt();
  327. if (size > 0) {
  328. int charDataSize = in.readInt();
  329. //From the spec: An optional unsigned integer that MUST be 3 if present
  330. // but some files has 4
  331. /*int usKeyValue = */ in.readUShort();
  332. _address = StringUtil.readUnicodeLE(in, charDataSize/2);
  333. } else {
  334. _address = null;
  335. }
  336. } else if (STD_MONIKER.equals(_moniker)) {
  337. _fileOpts = in.readShort();
  338. int len = in.readInt();
  339. byte[] path_bytes = IOUtils.safelyAllocate(len, MAX_RECORD_LENGTH);
  340. in.readFully(path_bytes);
  341. _address = new String(path_bytes, StringUtil.UTF8);
  342. }
  343. }
  344. if((_linkOpts & HLINK_PLACE) != 0) {
  345. int len = in.readInt();
  346. _textMark = in.readUnicodeLEString(len);
  347. }
  348. if (in.remaining() > 0) {
  349. LOG.atWarn().log("Hyperlink data remains: {} : {}", box(in.remaining()), toHex(in.readRemainder()));
  350. }
  351. }
  352. @Override
  353. public void serialize(LittleEndianOutput out) {
  354. _range.serialize(out);
  355. _guid.write(out);
  356. out.writeInt(0x00000002); // TODO const
  357. out.writeInt(_linkOpts);
  358. if ((_linkOpts & HLINK_LABEL) != 0){
  359. out.writeInt(_label.length());
  360. StringUtil.putUnicodeLE(_label, out);
  361. }
  362. if ((_linkOpts & HLINK_TARGET_FRAME) != 0){
  363. out.writeInt(_targetFrame.length());
  364. StringUtil.putUnicodeLE(_targetFrame, out);
  365. }
  366. if ((_linkOpts & HLINK_URL) != 0 && (_linkOpts & HLINK_UNC_PATH) != 0) {
  367. out.writeInt(_address.length());
  368. StringUtil.putUnicodeLE(_address, out);
  369. }
  370. if ((_linkOpts & HLINK_URL) != 0 && (_linkOpts & HLINK_UNC_PATH) == 0) {
  371. _moniker.write(out);
  372. if(URL_MONIKER.equals(_moniker)){
  373. if (_uninterpretedTail == null) {
  374. out.writeInt(_address.length()*2);
  375. StringUtil.putUnicodeLE(_address, out);
  376. } else {
  377. out.writeInt(_address.length()*2 + TAIL_SIZE);
  378. StringUtil.putUnicodeLE(_address, out);
  379. writeTail(_uninterpretedTail, out);
  380. }
  381. } else if (FILE_MONIKER.equals(_moniker)){
  382. out.writeShort(_fileOpts);
  383. out.writeInt(_shortFilename.length());
  384. StringUtil.putCompressedUnicode(_shortFilename, out);
  385. writeTail(_uninterpretedTail, out);
  386. if (_address == null) {
  387. out.writeInt(0);
  388. } else {
  389. int addrLen = _address.length() * 2;
  390. out.writeInt(addrLen + 6);
  391. out.writeInt(addrLen);
  392. out.writeShort(0x0003); // TODO const
  393. StringUtil.putUnicodeLE(_address, out);
  394. }
  395. }
  396. }
  397. if((_linkOpts & HLINK_PLACE) != 0){
  398. out.writeInt(_textMark.length());
  399. StringUtil.putUnicodeLE(_textMark, out);
  400. }
  401. }
  402. @Override
  403. protected int getDataSize() {
  404. int size = 0;
  405. size += 2 + 2 + 2 + 2; //rwFirst, rwLast, colFirst, colLast
  406. size += ClassID.LENGTH;
  407. size += 4; //label_opts
  408. size += 4; //link_opts
  409. if ((_linkOpts & HLINK_LABEL) != 0){
  410. size += 4; //link length
  411. size += _label.length()*2;
  412. }
  413. if ((_linkOpts & HLINK_TARGET_FRAME) != 0){
  414. size += 4; // int nChars
  415. size += _targetFrame.length()*2;
  416. }
  417. if ((_linkOpts & HLINK_URL) != 0 && (_linkOpts & HLINK_UNC_PATH) != 0) {
  418. size += 4; // int nChars
  419. size += _address.length()*2;
  420. }
  421. if ((_linkOpts & HLINK_URL) != 0 && (_linkOpts & HLINK_UNC_PATH) == 0) {
  422. size += ClassID.LENGTH;
  423. if(URL_MONIKER.equals(_moniker)){
  424. size += 4; //address length
  425. size += _address.length()*2;
  426. if (_uninterpretedTail != null) {
  427. size += TAIL_SIZE;
  428. }
  429. } else if (FILE_MONIKER.equals(_moniker)){
  430. size += 2; //file_opts
  431. size += 4; //address length
  432. size += _shortFilename.length();
  433. size += TAIL_SIZE;
  434. size += 4;
  435. if (_address != null) {
  436. size += 6;
  437. size += _address.length() * 2;
  438. }
  439. }
  440. }
  441. if((_linkOpts & HLINK_PLACE) != 0){
  442. size += 4; //address length
  443. size += _textMark.length()*2;
  444. }
  445. return size;
  446. }
  447. private static byte[] readTail(byte[] expectedTail, LittleEndianInput in) {
  448. byte[] result = new byte[TAIL_SIZE];
  449. in.readFully(result);
  450. return result;
  451. }
  452. private static void writeTail(byte[] tail, LittleEndianOutput out) {
  453. out.write(tail);
  454. }
  455. @Override
  456. public short getSid() {
  457. return HyperlinkRecord.sid;
  458. }
  459. /**
  460. * Based on the link options, is this a url?
  461. *
  462. * @return true, if this is a url link
  463. */
  464. @SuppressWarnings("unused")
  465. public boolean isUrlLink() {
  466. return (_linkOpts & HLINK_URL) > 0
  467. && (_linkOpts & HLINK_ABS) > 0;
  468. }
  469. /**
  470. * Based on the link options, is this a file?
  471. *
  472. * @return true, if this is a file link
  473. */
  474. public boolean isFileLink() {
  475. return (_linkOpts & HLINK_URL) > 0
  476. && (_linkOpts & HLINK_ABS) == 0;
  477. }
  478. /**
  479. * Based on the link options, is this a document?
  480. *
  481. * @return true, if this is a docment link
  482. */
  483. public boolean isDocumentLink() {
  484. return (_linkOpts & HLINK_PLACE) > 0;
  485. }
  486. /**
  487. * Initialize a new url link
  488. */
  489. public void newUrlLink() {
  490. _range = new CellRangeAddress(0, 0, 0, 0);
  491. _guid = STD_MONIKER.getClassID();
  492. _linkOpts = HLINK_URL | HLINK_ABS | HLINK_LABEL;
  493. setLabel("");
  494. _moniker = URL_MONIKER.getClassID();
  495. setAddress("");
  496. _uninterpretedTail = URL_TAIL;
  497. }
  498. /**
  499. * Initialize a new file link
  500. */
  501. public void newFileLink() {
  502. _range = new CellRangeAddress(0, 0, 0, 0);
  503. _guid = STD_MONIKER.getClassID();
  504. _linkOpts = HLINK_URL | HLINK_LABEL;
  505. _fileOpts = 0;
  506. setLabel("");
  507. _moniker = FILE_MONIKER.getClassID();
  508. setAddress(null);
  509. setShortFilename("");
  510. _uninterpretedTail = FILE_TAIL;
  511. }
  512. /**
  513. * Initialize a new document link
  514. */
  515. public void newDocumentLink() {
  516. _range = new CellRangeAddress(0, 0, 0, 0);
  517. _guid = STD_MONIKER.getClassID();
  518. _linkOpts = HLINK_LABEL | HLINK_PLACE;
  519. setLabel("");
  520. _moniker = FILE_MONIKER.getClassID();
  521. setAddress("");
  522. setTextMark("");
  523. }
  524. @Override
  525. public HyperlinkRecord copy() {
  526. return new HyperlinkRecord(this);
  527. }
  528. @Override
  529. public HSSFRecordTypes getGenericRecordType() {
  530. return HSSFRecordTypes.HYPERLINK;
  531. }
  532. @Override
  533. public Map<String, Supplier<?>> getGenericProperties() {
  534. return GenericRecordUtil.getGenericProperties(
  535. "range", () -> _range,
  536. "guid", this::getGuid,
  537. "linkOpts", () -> getBitsAsString(this::getLinkOptions,
  538. new int[]{HLINK_URL,HLINK_ABS,HLINK_PLACE,HLINK_LABEL,HLINK_TARGET_FRAME,HLINK_UNC_PATH},
  539. new String[]{"URL","ABS","PLACE","LABEL","TARGET_FRAME","UNC_PATH"}),
  540. "label", this::getLabel,
  541. "targetFrame", this::getTargetFrame,
  542. "moniker", this::getMoniker,
  543. "textMark", this::getTextMark,
  544. "address", this::getAddress
  545. );
  546. }
  547. }