import java.awt.geom.AffineTransform;\r
import java.awt.geom.Rectangle2D;\r
import java.awt.image.BufferedImage;\r
+import java.nio.charset.Charset;\r
import java.text.AttributedString;\r
import java.util.ArrayList;\r
import java.util.LinkedList;\r
import org.apache.poi.sl.draw.DrawFactory;\r
import org.apache.poi.sl.draw.DrawFontManager;\r
import org.apache.poi.sl.draw.Drawable;\r
+import org.apache.poi.util.LocaleUtil;\r
\r
public class HwmfGraphics {\r
+\r
+ private static final Charset DEFAULT_CHARSET = LocaleUtil.CHARSET_1252;\r
private final Graphics2D graphicsCtx;\r
private final List<HwmfDrawProperties> propStack = new LinkedList<HwmfDrawProperties>();\r
private HwmfDrawProperties prop = new HwmfDrawProperties();\r
break;\r
}\r
}\r
- \r
+\r
+ /**\r
+ *\r
+ * @param text\r
+ * @param bounds\r
+ * @deprecated use {@link #drawString(byte[], Rectangle2D)}\r
+ */\r
public void drawString(String text, Rectangle2D bounds) {\r
drawString(text, bounds, null);\r
}\r
- \r
+\r
+ public void drawString(byte[] text, Rectangle2D bounds) {\r
+ drawString(text, bounds, null);\r
+ }\r
+\r
+ /**\r
+ *\r
+ * @param text\r
+ * @param bounds\r
+ * @deprecated use {@link #drawString(byte[], Rectangle2D, int[])}\r
+ */\r
public void drawString(String text, Rectangle2D bounds, int dx[]) {\r
+ drawString(text.getBytes(DEFAULT_CHARSET), bounds, dx);\r
+ }\r
+\r
+ public void drawString(byte[] text, Rectangle2D bounds, int dx[]) {\r
HwmfFont font = prop.getFont();\r
- if (font == null || text == null || text.isEmpty()) {\r
+ if (font == null || text == null || text.length == 0) {\r
return;\r
}\r
\r
// TODO: another approx. ...\r
double fontW = fontH/1.8;\r
\r
- int len = text.length();\r
- AttributedString as = new AttributedString(text);\r
+ int len = text.length;\r
+ Charset charset = (font.getCharSet().getCharset() == null)?\r
+ DEFAULT_CHARSET : font.getCharSet().getCharset();\r
+ String textString = new String(text, charset);\r
+ AttributedString as = new AttributedString(textString);\r
if (dx == null || dx.length == 0) {\r
addAttributes(as, font);\r
} else {\r
\r
import java.io.IOException;\r
import java.nio.charset.Charset;\r
+import java.nio.charset.UnsupportedCharsetException;\r
\r
import org.apache.poi.util.LittleEndianConsts;\r
import org.apache.poi.util.LittleEndianInputStream;\r
+import org.apache.poi.util.POILogFactory;\r
+import org.apache.poi.util.POILogger;\r
\r
/**\r
* The Font object specifies the attributes of a logical font\r
*/\r
public class HwmfFont {\r
+\r
+ private static final POILogger logger = POILogFactory.getLogger(HwmfFont.class);\r
+\r
public enum WmfCharset {\r
/** Specifies the English character set. */\r
- ANSI_CHARSET(0x00000000),\r
+ ANSI_CHARSET(0x00000000, "Cp1252"),\r
/**\r
* Specifies a character set based on the current system locale;\r
* for example, when the system locale is United States English,\r
* the default character set is ANSI_CHARSET.\r
*/\r
- DEFAULT_CHARSET(0x00000001),\r
+ DEFAULT_CHARSET(0x00000001, "Cp1252"),\r
/** Specifies a character set of symbols. */\r
- SYMBOL_CHARSET(0x00000002),\r
+ SYMBOL_CHARSET(0x00000002, ""),\r
/** Specifies the Apple Macintosh character set. */\r
- MAC_CHARSET(0x0000004D),\r
+ MAC_CHARSET(0x0000004D, "MacRoman"),\r
/** Specifies the Japanese character set. */\r
- SHIFTJIS_CHARSET(0x00000080),\r
+ SHIFTJIS_CHARSET(0x00000080, "Shift_JIS"),\r
/** Also spelled "Hangeul". Specifies the Hangul Korean character set. */\r
- HANGUL_CHARSET(0x00000081),\r
+ HANGUL_CHARSET(0x00000081, "cp949"),\r
/** Also spelled "Johap". Specifies the Johab Korean character set. */\r
- JOHAB_CHARSET(0x00000082),\r
+ JOHAB_CHARSET(0x00000082, "x-Johab"),\r
/** Specifies the "simplified" Chinese character set for People's Republic of China. */\r
- GB2312_CHARSET(0x00000086),\r
+ GB2312_CHARSET(0x00000086, "GB2312"),\r
/**\r
* Specifies the "traditional" Chinese character set, used mostly in\r
* Taiwan and in the Hong Kong and Macao Special Administrative Regions.\r
*/\r
- CHINESEBIG5_CHARSET(0x00000088),\r
+ CHINESEBIG5_CHARSET(0x00000088, "Big5"),\r
/** Specifies the Greek character set. */\r
- GREEK_CHARSET(0x000000A1),\r
+ GREEK_CHARSET(0x000000A1, "Cp1253"),\r
/** Specifies the Turkish character set. */\r
- TURKISH_CHARSET(0x000000A2),\r
+ TURKISH_CHARSET(0x000000A2, "Cp1254"),\r
/** Specifies the Vietnamese character set. */\r
- VIETNAMESE_CHARSET(0x000000A3),\r
+ VIETNAMESE_CHARSET(0x000000A3, "Cp1258"),\r
/** Specifies the Hebrew character set. */\r
- HEBREW_CHARSET(0x000000B1),\r
+ HEBREW_CHARSET(0x000000B1, "Cp1255"),\r
/** Specifies the Arabic character set. */\r
- ARABIC_CHARSET(0x000000B2),\r
+ ARABIC_CHARSET(0x000000B2, "Cp1256"),\r
/** Specifies the Baltic (Northeastern European) character set. */\r
- BALTIC_CHARSET(0x000000BA),\r
+ BALTIC_CHARSET(0x000000BA, "Cp1257"),\r
/** Specifies the Russian Cyrillic character set. */\r
- RUSSIAN_CHARSET(0x000000CC),\r
+ RUSSIAN_CHARSET(0x000000CC, "Cp1251"),\r
/** Specifies the Thai character set. */\r
- THAI_CHARSET(0x000000DE),\r
+ THAI_CHARSET(0x000000DE, "x-windows-874"),\r
/** Specifies a Eastern European character set. */\r
- EASTEUROPE_CHARSET(0x000000EE),\r
+ EASTEUROPE_CHARSET(0x000000EE, "Cp1250"),\r
/**\r
* Specifies a mapping to one of the OEM code pages,\r
* according to the current system locale setting.\r
*/\r
- OEM_CHARSET(0x000000FF);\r
+ OEM_CHARSET(0x000000FF, "Cp1252");\r
\r
int flag;\r
- WmfCharset(int flag) {\r
+ Charset charset;\r
+\r
+ WmfCharset(int flag, String javaCharsetName) {\r
this.flag = flag;\r
+ if (javaCharsetName.length() > 0) {\r
+ try {\r
+ charset = Charset.forName(javaCharsetName);\r
+ return;\r
+ } catch (UnsupportedCharsetException e) {\r
+ logger.log(POILogger.WARN, "Unsupported charset: "+javaCharsetName);\r
+ }\r
+ }\r
+ charset = null;\r
+ }\r
+\r
+ /**\r
+ *\r
+ * @return charset for the font or <code>null</code> if there is no matching charset or\r
+ * if the charset is a "default"\r
+ */\r
+ public Charset getCharset() {\r
+ return charset;\r
}\r
\r
static WmfCharset valueOf(int flag) {\r
\r
import java.awt.geom.Rectangle2D;\r
import java.io.IOException;\r
+import java.nio.charset.Charset;\r
\r
import org.apache.poi.hwmf.draw.HwmfDrawProperties;\r
import org.apache.poi.hwmf.draw.HwmfGraphics;\r
import org.apache.poi.util.BitFieldFactory;\r
import org.apache.poi.util.LittleEndianConsts;\r
import org.apache.poi.util.LittleEndianInputStream;\r
-import org.apache.poi.util.LocaleUtil;\r
import org.apache.poi.util.POILogFactory;\r
import org.apache.poi.util.POILogger;\r
\r
* length of the string.\r
* The string is written at the location specified by the XStart and YStart fields.\r
*/\r
- private String text;\r
+ private byte[] rawTextBytes;\r
/**\r
* A 16-bit signed integer that defines the vertical (y-axis) coordinate, in logical\r
* units, of the point where drawing is to start.\r
@Override\r
public int init(LittleEndianInputStream leis, long recordSize, int recordFunction) throws IOException {\r
stringLength = leis.readShort();\r
- byte buf[] = new byte[stringLength+(stringLength&1)];\r
- leis.readFully(buf);\r
- text = new String(buf, 0, stringLength, LocaleUtil.CHARSET_1252).trim();\r
+ rawTextBytes = new byte[stringLength+(stringLength&1)];\r
+ leis.readFully(rawTextBytes);\r
yStart = leis.readShort();\r
xStart = leis.readShort();\r
- return 3*LittleEndianConsts.SHORT_SIZE+buf.length;\r
+ return 3*LittleEndianConsts.SHORT_SIZE+rawTextBytes.length;\r
}\r
\r
@Override\r
public void draw(HwmfGraphics ctx) {\r
Rectangle2D bounds = new Rectangle2D.Double(xStart, yStart, 0, 0);\r
- ctx.drawString(text, bounds);\r
+ ctx.drawString(getTextBytes(), bounds);\r
+ }\r
+\r
+ public String getText(Charset charset) {\r
+ return new String(getTextBytes(), charset);\r
+ }\r
+\r
+ /**\r
+ *\r
+ * @return a copy of a trimmed byte array of rawTextBytes bytes.\r
+ * This includes only the bytes from 0..stringLength.\r
+ * This does not include the extra optional padding on the byte array.\r
+ */\r
+ private byte[] getTextBytes() {\r
+ byte[] ret = new byte[stringLength];\r
+ System.arraycopy(rawTextBytes, 0, ret, 0, stringLength);\r
+ return ret;\r
}\r
}\r
\r
* the length is odd, an extra byte is placed after it so that the following member (optional Dx) is \r
* aligned on a 16-bit boundary.\r
*/\r
- private String text;\r
+ private byte[] rawTextBytes;\r
/**\r
* An optional array of 16-bit signed integers that indicate the distance between \r
* origins of adjacent character cells. For example, Dx[i] logical units separate the origins of \r
size += 4*LittleEndianConsts.SHORT_SIZE;\r
}\r
\r
- byte buf[] = new byte[stringLength+(stringLength&1)];\r
- leis.readFully(buf);\r
- text = new String(buf, 0, stringLength, LocaleUtil.CHARSET_1252);\r
- size += buf.length;\r
+ rawTextBytes = new byte[stringLength+(stringLength&1)];\r
+ leis.readFully(rawTextBytes);\r
+ size += rawTextBytes.length;\r
\r
if (size >= remainingRecordSize) {\r
logger.log(POILogger.INFO, "META_EXTTEXTOUT doesn't contain character tracking info");\r
@Override\r
public void draw(HwmfGraphics ctx) {\r
Rectangle2D bounds = new Rectangle2D.Double(x, y, 0, 0);\r
- ctx.drawString(text, bounds, dx);\r
+ ctx.drawString(getTextBytes(), bounds, dx);\r
+ }\r
+\r
+ public String getText(Charset charset) {\r
+ return new String(getTextBytes(), charset);\r
+ }\r
+\r
+ /**\r
+ *\r
+ * @return a copy of a trimmed byte array of rawTextBytes bytes.\r
+ * This includes only the bytes from 0..stringLength.\r
+ * This does not include the extra optional padding on the byte array.\r
+ */\r
+ private byte[] getTextBytes() {\r
+ byte[] ret = new byte[stringLength];\r
+ System.arraycopy(rawTextBytes, 0, ret, 0, stringLength);\r
+ return ret;\r
}\r
}\r
\r
public void applyObject(HwmfGraphics ctx) {\r
ctx.getProperties().setFont(font);\r
}\r
+\r
+ public HwmfFont getFont() {\r
+ return font;\r
+ }\r
}\r
}\r
package org.apache.poi.hwmf;\r
\r
import static org.junit.Assert.assertEquals;\r
+import static org.junit.Assert.assertTrue;\r
\r
+import javax.imageio.ImageIO;\r
import java.awt.Dimension;\r
import java.awt.Graphics2D;\r
import java.awt.RenderingHints;\r
import java.io.FilterInputStream;\r
import java.io.IOException;\r
import java.net.URL;\r
+import java.nio.charset.Charset;\r
import java.util.List;\r
import java.util.Locale;\r
import java.util.zip.ZipEntry;\r
import java.util.zip.ZipInputStream;\r
\r
-import javax.imageio.ImageIO;\r
-\r
import org.apache.poi.POIDataSamples;\r
import org.apache.poi.hwmf.record.HwmfFill.HwmfImageRecord;\r
+import org.apache.poi.hwmf.record.HwmfFont;\r
import org.apache.poi.hwmf.record.HwmfRecord;\r
+import org.apache.poi.hwmf.record.HwmfRecordType;\r
+import org.apache.poi.hwmf.record.HwmfText;\r
import org.apache.poi.hwmf.usermodel.HwmfPicture;\r
import org.apache.poi.sl.usermodel.PictureData;\r
import org.apache.poi.sl.usermodel.PictureData.PictureType;\r
import org.apache.poi.sl.usermodel.SlideShow;\r
import org.apache.poi.sl.usermodel.SlideShowFactory;\r
+import org.apache.poi.util.LocaleUtil;\r
import org.apache.poi.util.Units;\r
import org.junit.Ignore;\r
import org.junit.Test;\r
}\r
}\r
}\r
+\r
+ @Test\r
+ @Ignore("If we decide we can use common crawl file specified, we can turn this back on")\r
+ public void testCyrillic() throws Exception {\r
+ //TODO: move test file to framework and fix this\r
+ File dir = new File("C:/somethingOrOther");\r
+ File f = new File(dir, "ZMLH54SPLI76NQ7XMKVB7SMUJA2HTXTS-2.wmf");\r
+ HwmfPicture wmf = new HwmfPicture(new FileInputStream(f));\r
+\r
+ Charset charset = LocaleUtil.CHARSET_1252;\r
+ StringBuilder sb = new StringBuilder();\r
+ //this is pure hackery for specifying the font\r
+ //this happens to work on this test file, but you need to\r
+ //do what Graphics does by maintaining the stack, etc.!\r
+ for (HwmfRecord r : wmf.getRecords()) {\r
+ if (r.getRecordType().equals(HwmfRecordType.createFontIndirect)) {\r
+ HwmfFont font = ((HwmfText.WmfCreateFontIndirect)r).getFont();\r
+ charset = (font.getCharSet().getCharset() == null) ? LocaleUtil.CHARSET_1252 : font.getCharSet().getCharset();\r
+ }\r
+ if (r.getRecordType().equals(HwmfRecordType.extTextOut)) {\r
+ HwmfText.WmfExtTextOut textOut = (HwmfText.WmfExtTextOut)r;\r
+ sb.append(textOut.getText(charset)).append("\n");\r
+ }\r
+ }\r
+ String txt = sb.toString();\r
+ assertTrue(txt.contains("\u041E\u0431\u0449\u043E"));\r
+ assertTrue(txt.contains("\u0411\u0430\u043B\u0430\u043D\u0441"));\r
+ }\r
+\r
}\r