123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.hssf.record.common;
-
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.Iterator;
- import java.util.List;
- import java.util.Map;
- import java.util.Objects;
- import java.util.function.Supplier;
- import java.util.stream.Collectors;
-
- import org.apache.poi.common.Duplicatable;
- import org.apache.poi.common.usermodel.GenericRecord;
- import org.apache.poi.hssf.record.RecordInputStream;
- import org.apache.poi.hssf.record.cont.ContinuableRecordInput;
- import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
- import org.apache.poi.util.BitField;
- import org.apache.poi.util.BitFieldFactory;
- import org.apache.poi.util.GenericRecordUtil;
- import org.apache.poi.util.POILogFactory;
- import org.apache.poi.util.POILogger;
-
- /**
- * Unicode String - just standard fields that are in several records.
- * It is considered more desirable then repeating it in all of them.<p>
- * This is often called a XLUnicodeRichExtendedString in MS documentation.<p>
- */
- public class UnicodeString implements Comparable<UnicodeString>, Duplicatable, GenericRecord {
- private static final POILogger LOG = POILogFactory.getLogger(UnicodeString.class);
-
- private static final BitField highByte = BitFieldFactory.getInstance(0x1);
- // 0x2 is reserved
- private static final BitField extBit = BitFieldFactory.getInstance(0x4);
- private static final BitField richText = BitFieldFactory.getInstance(0x8);
-
- private short field_1_charCount;
- private byte field_2_optionflags;
- private String field_3_string;
- private List<FormatRun> field_4_format_runs;
- private ExtRst field_5_ext_rst;
-
- private UnicodeString(UnicodeString other) {
- field_1_charCount = other.field_1_charCount;
- field_2_optionflags = other.field_2_optionflags;
- field_3_string = other.field_3_string;
- field_4_format_runs = (other.field_4_format_runs == null) ? null :
- other.field_4_format_runs.stream().map(FormatRun::new).collect(Collectors.toList());
- field_5_ext_rst = (other.field_5_ext_rst == null) ? null : other.field_5_ext_rst.copy();
- }
-
- public UnicodeString(String str) {
- setString(str);
- }
-
- /**
- * construct a unicode string record and fill its fields, ID is ignored
- * @param in the RecordInputstream to read the record from
- */
- public UnicodeString(RecordInputStream in) {
- field_1_charCount = in.readShort();
- field_2_optionflags = in.readByte();
-
- int runCount = 0;
- int extensionLength = 0;
- //Read the number of rich runs if rich text.
- if (isRichText()) {
- runCount = in.readShort();
- }
- //Read the size of extended data if present.
- if (isExtendedText()) {
- extensionLength = in.readInt();
- }
-
- boolean isCompressed = ((field_2_optionflags & 1) == 0);
- int cc = getCharCount();
- field_3_string = (isCompressed) ? in.readCompressedUnicode(cc) : in.readUnicodeLEString(cc);
-
- if (isRichText() && (runCount > 0)) {
- field_4_format_runs = new ArrayList<>(runCount);
- for (int i=0;i<runCount;i++) {
- field_4_format_runs.add(new FormatRun(in));
- }
- }
-
- if (isExtendedText() && (extensionLength > 0)) {
- field_5_ext_rst = new ExtRst(new ContinuableRecordInput(in), extensionLength);
- if(field_5_ext_rst.getDataSize()+4 != extensionLength) {
- LOG.log(POILogger.WARN, "ExtRst was supposed to be " + extensionLength + " bytes long, but seems to actually be " + (field_5_ext_rst.getDataSize() + 4));
- }
- }
- }
-
- public int hashCode() {
- return Objects.hash(field_1_charCount, field_3_string);
- }
-
- /**
- * Our handling of equals is inconsistent with compareTo. The trouble is because we don't truely understand
- * rich text fields yet it's difficult to make a sound comparison.
- *
- * @param o The object to compare.
- * @return true if the object is actually equal.
- */
- public boolean equals(Object o)
- {
- if (!(o instanceof UnicodeString)) {
- return false;
- }
- UnicodeString other = (UnicodeString) o;
-
- //OK lets do this in stages to return a quickly, first check the actual string
- if (field_1_charCount != other.field_1_charCount
- || field_2_optionflags != other.field_2_optionflags
- || !field_3_string.equals(other.field_3_string)) {
- return false;
- }
-
- //OK string appears to be equal but now lets compare formatting runs
- if (field_4_format_runs == null) {
- // Strings are equal, and there are not formatting runs.
- return (other.field_4_format_runs == null);
- } else if (other.field_4_format_runs == null) {
- // Strings are equal, but one or the other has formatting runs
- return false;
- }
-
- //Strings are equal, so now compare formatting runs.
- int size = field_4_format_runs.size();
- if (size != other.field_4_format_runs.size()) {
- return false;
- }
-
- for (int i=0;i<size;i++) {
- FormatRun run1 = field_4_format_runs.get(i);
- FormatRun run2 = other.field_4_format_runs.get(i);
-
- if (!run1.equals(run2)) {
- return false;
- }
- }
-
- // Well the format runs are equal as well!, better check the ExtRst data
- if (field_5_ext_rst == null) {
- return (other.field_5_ext_rst == null);
- } else if (other.field_5_ext_rst == null) {
- return false;
- }
-
- return field_5_ext_rst.equals(other.field_5_ext_rst);
- }
-
-
-
- /**
- * get the number of characters in the string,
- * as an un-wrapped int
- *
- * @return number of characters
- */
- public int getCharCount() {
- if(field_1_charCount < 0) {
- return field_1_charCount + 65536;
- }
- return field_1_charCount;
- }
-
- /**
- * get the number of characters in the string,
- * wrapped as needed to fit within a short
- *
- * @return number of characters
- */
- public short getCharCountShort() {
- return field_1_charCount;
- }
-
- /**
- * set the number of characters in the string
- * @param cc - number of characters
- */
-
- public void setCharCount(short cc)
- {
- field_1_charCount = cc;
- }
-
- /**
- * get the option flags which among other things return if this is a 16-bit or
- * 8 bit string
- *
- * @return optionflags bitmask
- *
- */
-
- public byte getOptionFlags()
- {
- return field_2_optionflags;
- }
-
- /**
- * set the option flags which among other things return if this is a 16-bit or
- * 8 bit string
- *
- * @param of optionflags bitmask
- *
- */
-
- public void setOptionFlags(byte of)
- {
- field_2_optionflags = of;
- }
-
- /**
- * @return the actual string this contains as a java String object
- */
- public String getString()
- {
- return field_3_string;
- }
-
- /**
- * set the actual string this contains
- * @param string the text
- */
-
- public void setString(String string)
- {
- field_3_string = string;
- setCharCount((short)field_3_string.length());
- // scan for characters greater than 255 ... if any are
- // present, we have to use 16-bit encoding. Otherwise, we
- // can use 8-bit encoding
- boolean useUTF16 = false;
- int strlen = string.length();
-
- for ( int j = 0; j < strlen; j++ ) {
- if ( string.charAt( j ) > 255 ) {
- useUTF16 = true;
- break;
- }
- }
- if (useUTF16) {
- //Set the uncompressed bit
- field_2_optionflags = highByte.setByte(field_2_optionflags);
- } else {
- field_2_optionflags = highByte.clearByte(field_2_optionflags);
- }
- }
-
- public int getFormatRunCount() {
- return (field_4_format_runs == null) ? 0 : field_4_format_runs.size();
- }
-
- public FormatRun getFormatRun(int index) {
- if (field_4_format_runs == null) {
- return null;
- }
- if (index < 0 || index >= field_4_format_runs.size()) {
- return null;
- }
- return field_4_format_runs.get(index);
- }
-
- private int findFormatRunAt(int characterPos) {
- int size = field_4_format_runs.size();
- for (int i=0;i<size;i++) {
- FormatRun r = field_4_format_runs.get(i);
- if (r._character == characterPos) {
- return i;
- } else if (r._character > characterPos) {
- return -1;
- }
- }
- return -1;
- }
-
- /** Adds a font run to the formatted string.
- *
- * If a font run exists at the current charcter location, then it is
- * replaced with the font run to be added.
- */
- public void addFormatRun(FormatRun r) {
- if (field_4_format_runs == null) {
- field_4_format_runs = new ArrayList<>();
- }
-
- int index = findFormatRunAt(r._character);
- if (index != -1) {
- field_4_format_runs.remove(index);
- }
-
- field_4_format_runs.add(r);
- //Need to sort the font runs to ensure that the font runs appear in
- //character order
- Collections.sort(field_4_format_runs);
-
- //Make sure that we now say that we are a rich string
- field_2_optionflags = richText.setByte(field_2_optionflags);
- }
-
- public Iterator<FormatRun> formatIterator() {
- if (field_4_format_runs != null) {
- return field_4_format_runs.iterator();
- }
- return null;
- }
-
- public void removeFormatRun(FormatRun r) {
- field_4_format_runs.remove(r);
- if (field_4_format_runs.size() == 0) {
- field_4_format_runs = null;
- field_2_optionflags = richText.clearByte(field_2_optionflags);
- }
- }
-
- public void clearFormatting() {
- field_4_format_runs = null;
- field_2_optionflags = richText.clearByte(field_2_optionflags);
- }
-
-
- public ExtRst getExtendedRst() {
- return this.field_5_ext_rst;
- }
- void setExtendedRst(ExtRst ext_rst) {
- if (ext_rst != null) {
- field_2_optionflags = extBit.setByte(field_2_optionflags);
- } else {
- field_2_optionflags = extBit.clearByte(field_2_optionflags);
- }
- this.field_5_ext_rst = ext_rst;
- }
-
-
- /**
- * Swaps all use in the string of one font index
- * for use of a different font index.
- * Normally only called when fonts have been
- * removed / re-ordered
- */
- public void swapFontUse(short oldFontIndex, short newFontIndex) {
- if (field_4_format_runs != null) {
- for (FormatRun run : field_4_format_runs) {
- if(run._fontIndex == oldFontIndex) {
- run._fontIndex = newFontIndex;
- }
- }
- }
- }
-
- /**
- * unlike the real records we return the same as "getString()" rather than debug info
- * @see #getDebugInfo()
- * @return String value of the record
- */
-
- public String toString()
- {
- return getString();
- }
-
- /**
- * return a character representation of the fields of this record
- *
- *
- * @return String of output for biffviewer etc.
- *
- */
- public String getDebugInfo()
- {
- StringBuilder buffer = new StringBuilder();
-
- buffer.append("[UNICODESTRING]\n");
- buffer.append(" .charcount = ")
- .append(Integer.toHexString(getCharCount())).append("\n");
- buffer.append(" .optionflags = ")
- .append(Integer.toHexString(getOptionFlags())).append("\n");
- buffer.append(" .string = ").append(getString()).append("\n");
- if (field_4_format_runs != null) {
- for (int i = 0; i < field_4_format_runs.size();i++) {
- FormatRun r = field_4_format_runs.get(i);
- buffer.append(" .format_run").append(i).append(" = ").append(r).append("\n");
- }
- }
- if (field_5_ext_rst != null) {
- buffer.append(" .field_5_ext_rst = ").append("\n");
- buffer.append(field_5_ext_rst).append("\n");
- }
- buffer.append("[/UNICODESTRING]\n");
- return buffer.toString();
- }
-
- /**
- * Serialises out the String. There are special rules
- * about where we can and can't split onto
- * Continue records.
- */
- public void serialize(ContinuableRecordOutput out) {
- int numberOfRichTextRuns = 0;
- int extendedDataSize = 0;
- if (isRichText() && field_4_format_runs != null) {
- numberOfRichTextRuns = field_4_format_runs.size();
- }
- if (isExtendedText() && field_5_ext_rst != null) {
- extendedDataSize = 4 + field_5_ext_rst.getDataSize();
- }
-
- // Serialise the bulk of the String
- // The writeString handles tricky continue stuff for us
- out.writeString(field_3_string, numberOfRichTextRuns, extendedDataSize);
-
- if (numberOfRichTextRuns > 0) {
-
- //This will ensure that a run does not split a continue
- for (int i=0;i<numberOfRichTextRuns;i++) {
- if (out.getAvailableSpace() < 4) {
- out.writeContinue();
- }
- FormatRun r = field_4_format_runs.get(i);
- r.serialize(out);
- }
- }
-
- if (extendedDataSize > 0 && field_5_ext_rst != null) {
- field_5_ext_rst.serialize(out);
- }
- }
-
- public int compareTo(UnicodeString str) {
-
- int result = getString().compareTo(str.getString());
-
- //As per the equals method lets do this in stages
- if (result != 0) {
- return result;
- }
-
- //OK string appears to be equal but now lets compare formatting runs
- if (field_4_format_runs == null) {
- //Strings are equal, and there are no formatting runs. -> 0
- //Strings are equal, but one or the other has formatting runs -> 1
- return (str.field_4_format_runs == null) ? 0 : 1;
- } else if (str.field_4_format_runs == null) {
- //Strings are equal, but one or the other has formatting runs
- return -1;
- }
-
- //Strings are equal, so now compare formatting runs.
- int size = field_4_format_runs.size();
- if (size != str.field_4_format_runs.size()) {
- return size - str.field_4_format_runs.size();
- }
-
- for (int i=0;i<size;i++) {
- FormatRun run1 = field_4_format_runs.get(i);
- FormatRun run2 = str.field_4_format_runs.get(i);
-
- result = run1.compareTo(run2);
- if (result != 0) {
- return result;
- }
- }
-
- //Well the format runs are equal as well!, better check the ExtRst data
- if (field_5_ext_rst == null) {
- return (str.field_5_ext_rst == null) ? 0 : 1;
- } else if (str.field_5_ext_rst == null) {
- return -1;
- } else {
- return field_5_ext_rst.compareTo(str.field_5_ext_rst);
- }
- }
-
- private boolean isRichText() {
- return richText.isSet(getOptionFlags());
- }
-
- private boolean isExtendedText() {
- return extBit.isSet(getOptionFlags());
- }
-
- @Override
- public UnicodeString copy() {
- return new UnicodeString(this);
- }
-
- @Override
- public Map<String, Supplier<?>> getGenericProperties() {
- return GenericRecordUtil.getGenericProperties(
- "charCount", this::getCharCount,
- "optionFlags", this::getOptionFlags,
- "string", this::getString,
- "formatRuns", () -> field_4_format_runs,
- "extendedRst", this::getExtendedRst
- );
- }
- }
|