diff options
Diffstat (limited to 'src/java/org/apache/poi/common/usermodel')
5 files changed, 529 insertions, 0 deletions
diff --git a/src/java/org/apache/poi/common/usermodel/fonts/FontCharset.java b/src/java/org/apache/poi/common/usermodel/fonts/FontCharset.java new file mode 100644 index 0000000000..aeeca9284c --- /dev/null +++ b/src/java/org/apache/poi/common/usermodel/fonts/FontCharset.java @@ -0,0 +1,124 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.common.usermodel.fonts; + +import java.nio.charset.Charset; +import java.nio.charset.UnsupportedCharsetException; + +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; + +/** + * Charset represents the basic set of characters associated with a font (that it can display), and + * corresponds to the ANSI codepage (8-bit or DBCS) of that character set used by a given language. + * + * @since POI 3.17-beta2 + */ +public enum FontCharset { + /** Specifies the English character set. */ + ANSI(0x00000000, "Cp1252"), + /** + * Specifies a character set based on the current system locale; + * for example, when the system locale is United States English, + * the default character set is ANSI_CHARSET. + */ + DEFAULT(0x00000001, "Cp1252"), + /** Specifies a character set of symbols. */ + SYMBOL(0x00000002, ""), + /** Specifies the Apple Macintosh character set. */ + MAC(0x0000004D, "MacRoman"), + /** Specifies the Japanese character set. */ + SHIFTJIS(0x00000080, "Shift_JIS"), + /** Also spelled "Hangeul". Specifies the Hangul Korean character set. */ + HANGUL(0x00000081, "cp949"), + /** Also spelled "Johap". Specifies the Johab Korean character set. */ + JOHAB(0x00000082, "x-Johab"), + /** Specifies the "simplified" Chinese character set for People's Republic of China. */ + GB2312(0x00000086, "GB2312"), + /** + * Specifies the "traditional" Chinese character set, used mostly in + * Taiwan and in the Hong Kong and Macao Special Administrative Regions. + */ + CHINESEBIG5(0x00000088, "Big5"), + /** Specifies the Greek character set. */ + GREEK(0x000000A1, "Cp1253"), + /** Specifies the Turkish character set. */ + TURKISH(0x000000A2, "Cp1254"), + /** Specifies the Vietnamese character set. */ + VIETNAMESE(0x000000A3, "Cp1258"), + /** Specifies the Hebrew character set. */ + HEBREW(0x000000B1, "Cp1255"), + /** Specifies the Arabic character set. */ + ARABIC(0x000000B2, "Cp1256"), + /** Specifies the Baltic (Northeastern European) character set. */ + BALTIC(0x000000BA, "Cp1257"), + /** Specifies the Russian Cyrillic character set. */ + RUSSIAN(0x000000CC, "Cp1251"), + /** Specifies the Thai character set. */ + THAI_(0x000000DE, "x-windows-874"), + /** Specifies a Eastern European character set. */ + EASTEUROPE(0x000000EE, "Cp1250"), + /** + * Specifies a mapping to one of the OEM code pages, + * according to the current system locale setting. + */ + OEM(0x000000FF, "Cp1252"); + + private static FontCharset[] _table = new FontCharset[256]; + + private int nativeId; + private Charset charset; + + + static { + for (FontCharset c : values()) { + _table[c.getNativeId()] = c; + } + } + + FontCharset(int flag, String javaCharsetName) { + this.nativeId = flag; + if (javaCharsetName.length() > 0) { + try { + charset = Charset.forName(javaCharsetName); + return; + } catch (UnsupportedCharsetException e) { + POILogger logger = POILogFactory.getLogger(FontCharset.class); + logger.log(POILogger.WARN, "Unsupported charset: "+javaCharsetName); + } + } + charset = null; + } + + /** + * + * @return charset for the font or <code>null</code> if there is no matching charset or + * if the charset is a "default" + */ + public Charset getCharset() { + return charset; + } + + public int getNativeId() { + return nativeId; + } + + public static FontCharset valueOf(int value){ + return (value < 0 || value >= _table.length) ? null :_table[value]; + } +}
\ No newline at end of file diff --git a/src/java/org/apache/poi/common/usermodel/fonts/FontFamily.java b/src/java/org/apache/poi/common/usermodel/fonts/FontFamily.java new file mode 100644 index 0000000000..8faa788f58 --- /dev/null +++ b/src/java/org/apache/poi/common/usermodel/fonts/FontFamily.java @@ -0,0 +1,80 @@ +/* ==================================================================== +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==================================================================== */ + +package org.apache.poi.common.usermodel.fonts; + +/** + * A property of a font that describes its general appearance. + * + * @since POI 3.17-beta2 + */ +public enum FontFamily { + /** + * The default font is specified, which is implementation-dependent. + */ + FF_DONTCARE (0x00), + /** + * Fonts with variable stroke widths, which are proportional to the actual widths of + * the glyphs, and which have serifs. "MS Serif" is an example. + */ + FF_ROMAN (0x01), + /** + * Fonts with variable stroke widths, which are proportional to the actual widths of the + * glyphs, and which do not have serifs. "MS Sans Serif" is an example. + */ + FF_SWISS (0x02), + /** + * Fonts with constant stroke width, with or without serifs. Fixed-width fonts are + * usually modern. "Pica", "Elite", and "Courier New" are examples. + */ + FF_MODERN (0x03), + /** + * Fonts designed to look like handwriting. "Script" and "Cursive" are examples. + */ + FF_SCRIPT (0x04), + /** + * Novelty fonts. "Old English" is an example. + */ + FF_DECORATIVE (0x05); + + private int nativeId; + private FontFamily(int nativeId) { + this.nativeId = nativeId; + } + + public int getFlag() { + return nativeId; + } + + public static FontFamily valueOf(int nativeId) { + for (FontFamily ff : values()) { + if (ff.nativeId == nativeId) { + return ff; + } + } + return null; + } + + + /** + * Get FontFamily from combined native id + */ + public static FontFamily valueOfPitchFamily(byte pitchAndFamily) { + return valueOf(pitchAndFamily >>> 4); + } + +}
\ No newline at end of file diff --git a/src/java/org/apache/poi/common/usermodel/fonts/FontGroup.java b/src/java/org/apache/poi/common/usermodel/fonts/FontGroup.java new file mode 100644 index 0000000000..b1c294588e --- /dev/null +++ b/src/java/org/apache/poi/common/usermodel/fonts/FontGroup.java @@ -0,0 +1,149 @@ +/* ==================================================================== +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==================================================================== */ + +package org.apache.poi.common.usermodel.fonts; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.TreeMap; + +/** + * Text runs can contain characters which will be handled (if configured) by a different font, + * because the default font (latin) doesn't contain corresponding glyphs. + * + * @since POI 3.17-beta2 + * + * @see <a href="https://blogs.msdn.microsoft.com/officeinteroperability/2013/04/22/office-open-xml-themes-schemes-and-fonts/">Office Open XML Themes, Schemes, and Fonts</a> + */ +public enum FontGroup { + /** type for latin charset (default) - also used for unicode fonts like MS Arial Unicode */ + LATIN, + /** type for east asian charsets - usually set as fallback for the latin font, e.g. something like MS Gothic or MS Mincho */ + EAST_ASIAN, + /** type for symbol fonts */ + SYMBOL, + /** type for complex scripts - see https://msdn.microsoft.com/en-us/library/windows/desktop/dd317698 */ + COMPLEX_SCRIPT + ; + + + public static class FontGroupRange { + private int len; + private FontGroup fontGroup; + public int getLength() { + return len; + } + public FontGroup getFontGroup( ) { + return fontGroup; + } + } + + private static class Range { + int upper; + FontGroup fontGroup; + Range(int upper, FontGroup fontGroup) { + this.upper = upper; + this.fontGroup = fontGroup; + } + } + + private static NavigableMap<Integer,Range> UCS_RANGES; + + static { + UCS_RANGES = new TreeMap<Integer,Range>(); + UCS_RANGES.put(0x0000, new Range(0x007F, LATIN)); + UCS_RANGES.put(0x0080, new Range(0x00A6, LATIN)); + UCS_RANGES.put(0x00A9, new Range(0x00AF, LATIN)); + UCS_RANGES.put(0x00B2, new Range(0x00B3, LATIN)); + UCS_RANGES.put(0x00B5, new Range(0x00D6, LATIN)); + UCS_RANGES.put(0x00D8, new Range(0x00F6, LATIN)); + UCS_RANGES.put(0x00F8, new Range(0x058F, LATIN)); + UCS_RANGES.put(0x0590, new Range(0x074F, COMPLEX_SCRIPT)); + UCS_RANGES.put(0x0780, new Range(0x07BF, COMPLEX_SCRIPT)); + UCS_RANGES.put(0x0900, new Range(0x109F, COMPLEX_SCRIPT)); + UCS_RANGES.put(0x10A0, new Range(0x10FF, LATIN)); + UCS_RANGES.put(0x1200, new Range(0x137F, LATIN)); + UCS_RANGES.put(0x13A0, new Range(0x177F, LATIN)); + UCS_RANGES.put(0x1D00, new Range(0x1D7F, LATIN)); + UCS_RANGES.put(0x1E00, new Range(0x1FFF, LATIN)); + UCS_RANGES.put(0x1780, new Range(0x18AF, COMPLEX_SCRIPT)); + UCS_RANGES.put(0x2000, new Range(0x200B, LATIN)); + UCS_RANGES.put(0x200C, new Range(0x200F, COMPLEX_SCRIPT)); + // For the quote characters in the range U+2018 - U+201E, use the East Asian font + // if the text has one of the following language identifiers: + // ii-CN, ja-JP, ko-KR, zh-CN,zh-HK, zh-MO, zh-SG, zh-TW + UCS_RANGES.put(0x2010, new Range(0x2029, LATIN)); + UCS_RANGES.put(0x202A, new Range(0x202F, COMPLEX_SCRIPT)); + UCS_RANGES.put(0x2030, new Range(0x2046, LATIN)); + UCS_RANGES.put(0x204A, new Range(0x245F, LATIN)); + UCS_RANGES.put(0x2670, new Range(0x2671, COMPLEX_SCRIPT)); + UCS_RANGES.put(0x27C0, new Range(0x2BFF, LATIN)); + UCS_RANGES.put(0x3099, new Range(0x309A, EAST_ASIAN)); + UCS_RANGES.put(0xD835, new Range(0xD835, LATIN)); + UCS_RANGES.put(0xF000, new Range(0xF0FF, SYMBOL)); + UCS_RANGES.put(0xFB00, new Range(0xFB17, LATIN)); + UCS_RANGES.put(0xFB1D, new Range(0xFB4F, COMPLEX_SCRIPT)); + UCS_RANGES.put(0xFE50, new Range(0xFE6F, LATIN)); + // All others EAST_ASIAN + }; + + + /** + * Try to guess the font group based on the codepoint + * + * @param runText the text which font groups are to be analyzed + * @return the FontGroup + */ + public static List<FontGroupRange> getFontGroupRanges(String runText) { + List<FontGroupRange> ttrList = new ArrayList<FontGroupRange>(); + FontGroupRange ttrLast = null; + final int rlen = (runText != null) ? runText.length() : 0; + for(int cp, i = 0, charCount; i < rlen; i += charCount) { + cp = runText.codePointAt(i); + charCount = Character.charCount(cp); + + // don't switch the font group for a few default characters supposedly available in all fonts + FontGroup tt; + if (ttrLast != null && " \n\r".indexOf(cp) > -1) { + tt = ttrLast.fontGroup; + } else { + tt = lookup(cp); + } + + if (ttrLast == null || ttrLast.fontGroup != tt) { + ttrLast = new FontGroupRange(); + ttrLast.fontGroup = tt; + ttrList.add(ttrLast); + } + ttrLast.len += charCount; + } + return ttrList; + } + + public static FontGroup getFontGroupFirst(String runText) { + return (runText == null || runText.isEmpty()) ? LATIN : lookup(runText.codePointAt(0)); + } + + private static FontGroup lookup(int codepoint) { + // Do a lookup for a match in UCS_RANGES + Map.Entry<Integer,Range> entry = UCS_RANGES.floorEntry(codepoint); + Range range = (entry != null) ? entry.getValue() : null; + return (range != null && codepoint <= range.upper) ? range.fontGroup : EAST_ASIAN; + } +}
\ No newline at end of file diff --git a/src/java/org/apache/poi/common/usermodel/fonts/FontInfo.java b/src/java/org/apache/poi/common/usermodel/fonts/FontInfo.java new file mode 100644 index 0000000000..ecb5a69687 --- /dev/null +++ b/src/java/org/apache/poi/common/usermodel/fonts/FontInfo.java @@ -0,0 +1,102 @@ +/* ==================================================================== +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==================================================================== */ + +package org.apache.poi.common.usermodel.fonts; + +/** + * A FontInfo object holds information about a font configuration. + * It is roughly an equivalent to the LOGFONT structure in Windows GDI.<p> + * + * If an implementation doesn't provide a property, the getter will return {@code null} - + * if the value is unset, a default value will be returned.<p> + * + * Setting a unsupported property results in an {@link UnsupportedOperationException}. + * + * @since POI 3.17-beta2 + * + * @see <a href="https://msdn.microsoft.com/en-us/library/dd145037.aspx">LOGFONT structure</a> + */ +public interface FontInfo { + + /** + * Get the index within the collection of Font objects + * @return unique index number of the underlying record this Font represents + * (probably you don't care unless you're comparing which one is which) + */ + Integer getIndex(); + + /** + * Sets the index within the collection of Font objects + * + * @param index the index within the collection of Font objects + * + * @throws UnsupportedOperationException if unsupported + */ + void setIndex(int index); + + + /** + * @return the full name of the font, i.e. font family + type face + */ + String getTypeface(); + + /** + * Sets the font name + * + * @param typeface the full name of the font, when {@code null} removes the font definition - + * removal is implementation specific + */ + void setTypeface(String typeface); + + /** + * @return the font charset + */ + FontCharset getCharset(); + + /** + * Sets the charset + * + * @param charset the charset + */ + void setCharset(FontCharset charset); + + /** + * @return the family class + */ + FontFamily getFamily(); + + /** + * Sets the font family class + * + * @param family the font family class + */ + void setFamily(FontFamily family); + + /** + * @return the font pitch or {@code null} if unsupported + */ + FontPitch getPitch(); + + /** + * Set the font pitch + * + * @param pitch the font pitch + * + * @throws UnsupportedOperationException if unsupported + */ + void setPitch(FontPitch pitch); +}
\ No newline at end of file diff --git a/src/java/org/apache/poi/common/usermodel/fonts/FontPitch.java b/src/java/org/apache/poi/common/usermodel/fonts/FontPitch.java new file mode 100644 index 0000000000..78c6533944 --- /dev/null +++ b/src/java/org/apache/poi/common/usermodel/fonts/FontPitch.java @@ -0,0 +1,74 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.common.usermodel.fonts; + +/** + * A property of a font that describes the pitch, of the characters. + * + * @since POI 3.17-beta2 + */ +public enum FontPitch { + /** + * The default pitch, which is implementation-dependent. + */ + DEFAULT (0x00), + /** + * A fixed pitch, which means that all the characters in the font occupy the same + * width when output in a string. + */ + FIXED (0x01), + /** + * A variable pitch, which means that the characters in the font occupy widths + * that are proportional to the actual widths of the glyphs when output in a string. For example, + * the "i" and space characters usually have much smaller widths than a "W" or "O" character. + */ + VARIABLE (0x02); + + private int nativeId; + FontPitch(int nativeId) { + this.nativeId = nativeId; + } + + public int getNativeId() { + return nativeId; + } + + public static FontPitch valueOf(int flag) { + for (FontPitch fp : values()) { + if (fp.nativeId == flag) return fp; + } + return null; + } + + /** + * Combine pitch and family to native id + * + * @see <a href="https://msdn.microsoft.com/en-us/library/dd145037.aspx">LOGFONT structure</a> + */ + public static byte getNativeId(FontPitch pitch, FontFamily family) { + return (byte)(pitch.getNativeId() | (family.getFlag() << 4)); + } + + /** + * Get FontPitch from native id + */ + public static FontPitch valueOfPitchFamily(byte pitchAndFamily) { + return valueOf(pitchAndFamily & 0x3); + } +} + |