git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1779519 13f79535-47bb-0310-9956-ffa450edef68pull/26/head
@@ -29,6 +29,7 @@ import java.awt.font.TextAttribute; | |||
import java.awt.geom.AffineTransform; | |||
import java.awt.geom.Rectangle2D; | |||
import java.awt.image.BufferedImage; | |||
import java.nio.charset.Charset; | |||
import java.text.AttributedString; | |||
import java.util.ArrayList; | |||
import java.util.LinkedList; | |||
@@ -48,8 +49,11 @@ import org.apache.poi.hwmf.record.HwmfPenStyle.HwmfLineDash; | |||
import org.apache.poi.sl.draw.DrawFactory; | |||
import org.apache.poi.sl.draw.DrawFontManager; | |||
import org.apache.poi.sl.draw.Drawable; | |||
import org.apache.poi.util.LocaleUtil; | |||
public class HwmfGraphics { | |||
private static final Charset DEFAULT_CHARSET = LocaleUtil.CHARSET_1252; | |||
private final Graphics2D graphicsCtx; | |||
private final List<HwmfDrawProperties> propStack = new LinkedList<HwmfDrawProperties>(); | |||
private HwmfDrawProperties prop = new HwmfDrawProperties(); | |||
@@ -311,14 +315,34 @@ public class HwmfGraphics { | |||
break; | |||
} | |||
} | |||
/** | |||
* | |||
* @param text | |||
* @param bounds | |||
* @deprecated use {@link #drawString(byte[], Rectangle2D)} | |||
*/ | |||
public void drawString(String text, Rectangle2D bounds) { | |||
drawString(text, bounds, null); | |||
} | |||
public void drawString(byte[] text, Rectangle2D bounds) { | |||
drawString(text, bounds, null); | |||
} | |||
/** | |||
* | |||
* @param text | |||
* @param bounds | |||
* @deprecated use {@link #drawString(byte[], Rectangle2D, int[])} | |||
*/ | |||
public void drawString(String text, Rectangle2D bounds, int dx[]) { | |||
drawString(text.getBytes(DEFAULT_CHARSET), bounds, dx); | |||
} | |||
public void drawString(byte[] text, Rectangle2D bounds, int dx[]) { | |||
HwmfFont font = prop.getFont(); | |||
if (font == null || text == null || text.isEmpty()) { | |||
if (font == null || text == null || text.length == 0) { | |||
return; | |||
} | |||
@@ -326,8 +350,11 @@ public class HwmfGraphics { | |||
// TODO: another approx. ... | |||
double fontW = fontH/1.8; | |||
int len = text.length(); | |||
AttributedString as = new AttributedString(text); | |||
int len = text.length; | |||
Charset charset = (font.getCharSet().getCharset() == null)? | |||
DEFAULT_CHARSET : font.getCharSet().getCharset(); | |||
String textString = new String(text, charset); | |||
AttributedString as = new AttributedString(textString); | |||
if (dx == null || dx.length == 0) { | |||
addAttributes(as, font); | |||
} else { |
@@ -19,67 +19,93 @@ package org.apache.poi.hwmf.record; | |||
import java.io.IOException; | |||
import java.nio.charset.Charset; | |||
import java.nio.charset.UnsupportedCharsetException; | |||
import org.apache.poi.util.LittleEndianConsts; | |||
import org.apache.poi.util.LittleEndianInputStream; | |||
import org.apache.poi.util.POILogFactory; | |||
import org.apache.poi.util.POILogger; | |||
/** | |||
* The Font object specifies the attributes of a logical font | |||
*/ | |||
public class HwmfFont { | |||
private static final POILogger logger = POILogFactory.getLogger(HwmfFont.class); | |||
public enum WmfCharset { | |||
/** Specifies the English character set. */ | |||
ANSI_CHARSET(0x00000000), | |||
ANSI_CHARSET(0x00000000, "Cp1252"), | |||
/** | |||
* Specifies a character set based on the current system locale; | |||
* for example, when the system locale is United States English, | |||
* the default character set is ANSI_CHARSET. | |||
*/ | |||
DEFAULT_CHARSET(0x00000001), | |||
DEFAULT_CHARSET(0x00000001, "Cp1252"), | |||
/** Specifies a character set of symbols. */ | |||
SYMBOL_CHARSET(0x00000002), | |||
SYMBOL_CHARSET(0x00000002, ""), | |||
/** Specifies the Apple Macintosh character set. */ | |||
MAC_CHARSET(0x0000004D), | |||
MAC_CHARSET(0x0000004D, "MacRoman"), | |||
/** Specifies the Japanese character set. */ | |||
SHIFTJIS_CHARSET(0x00000080), | |||
SHIFTJIS_CHARSET(0x00000080, "Shift_JIS"), | |||
/** Also spelled "Hangeul". Specifies the Hangul Korean character set. */ | |||
HANGUL_CHARSET(0x00000081), | |||
HANGUL_CHARSET(0x00000081, "cp949"), | |||
/** Also spelled "Johap". Specifies the Johab Korean character set. */ | |||
JOHAB_CHARSET(0x00000082), | |||
JOHAB_CHARSET(0x00000082, "x-Johab"), | |||
/** Specifies the "simplified" Chinese character set for People's Republic of China. */ | |||
GB2312_CHARSET(0x00000086), | |||
GB2312_CHARSET(0x00000086, "GB2312"), | |||
/** | |||
* Specifies the "traditional" Chinese character set, used mostly in | |||
* Taiwan and in the Hong Kong and Macao Special Administrative Regions. | |||
*/ | |||
CHINESEBIG5_CHARSET(0x00000088), | |||
CHINESEBIG5_CHARSET(0x00000088, "Big5"), | |||
/** Specifies the Greek character set. */ | |||
GREEK_CHARSET(0x000000A1), | |||
GREEK_CHARSET(0x000000A1, "Cp1253"), | |||
/** Specifies the Turkish character set. */ | |||
TURKISH_CHARSET(0x000000A2), | |||
TURKISH_CHARSET(0x000000A2, "Cp1254"), | |||
/** Specifies the Vietnamese character set. */ | |||
VIETNAMESE_CHARSET(0x000000A3), | |||
VIETNAMESE_CHARSET(0x000000A3, "Cp1258"), | |||
/** Specifies the Hebrew character set. */ | |||
HEBREW_CHARSET(0x000000B1), | |||
HEBREW_CHARSET(0x000000B1, "Cp1255"), | |||
/** Specifies the Arabic character set. */ | |||
ARABIC_CHARSET(0x000000B2), | |||
ARABIC_CHARSET(0x000000B2, "Cp1256"), | |||
/** Specifies the Baltic (Northeastern European) character set. */ | |||
BALTIC_CHARSET(0x000000BA), | |||
BALTIC_CHARSET(0x000000BA, "Cp1257"), | |||
/** Specifies the Russian Cyrillic character set. */ | |||
RUSSIAN_CHARSET(0x000000CC), | |||
RUSSIAN_CHARSET(0x000000CC, "Cp1251"), | |||
/** Specifies the Thai character set. */ | |||
THAI_CHARSET(0x000000DE), | |||
THAI_CHARSET(0x000000DE, "x-windows-874"), | |||
/** Specifies a Eastern European character set. */ | |||
EASTEUROPE_CHARSET(0x000000EE), | |||
EASTEUROPE_CHARSET(0x000000EE, "Cp1250"), | |||
/** | |||
* Specifies a mapping to one of the OEM code pages, | |||
* according to the current system locale setting. | |||
*/ | |||
OEM_CHARSET(0x000000FF); | |||
OEM_CHARSET(0x000000FF, "Cp1252"); | |||
int flag; | |||
WmfCharset(int flag) { | |||
Charset charset; | |||
WmfCharset(int flag, String javaCharsetName) { | |||
this.flag = flag; | |||
if (javaCharsetName.length() > 0) { | |||
try { | |||
charset = Charset.forName(javaCharsetName); | |||
return; | |||
} catch (UnsupportedCharsetException e) { | |||
logger.log(POILogger.WARN, "Unsupported charset: "+javaCharsetName); | |||
} | |||
} | |||
charset = null; | |||
} | |||
/** | |||
* | |||
* @return charset for the font or <code>null</code> if there is no matching charset or | |||
* if the charset is a "default" | |||
*/ | |||
public Charset getCharset() { | |||
return charset; | |||
} | |||
static WmfCharset valueOf(int flag) { |
@@ -19,6 +19,7 @@ package org.apache.poi.hwmf.record; | |||
import java.awt.geom.Rectangle2D; | |||
import java.io.IOException; | |||
import java.nio.charset.Charset; | |||
import org.apache.poi.hwmf.draw.HwmfDrawProperties; | |||
import org.apache.poi.hwmf.draw.HwmfGraphics; | |||
@@ -27,7 +28,6 @@ import org.apache.poi.util.BitField; | |||
import org.apache.poi.util.BitFieldFactory; | |||
import org.apache.poi.util.LittleEndianConsts; | |||
import org.apache.poi.util.LittleEndianInputStream; | |||
import org.apache.poi.util.LocaleUtil; | |||
import org.apache.poi.util.POILogFactory; | |||
import org.apache.poi.util.POILogger; | |||
@@ -144,7 +144,7 @@ public class HwmfText { | |||
* length of the string. | |||
* The string is written at the location specified by the XStart and YStart fields. | |||
*/ | |||
private String text; | |||
private byte[] rawTextBytes; | |||
/** | |||
* A 16-bit signed integer that defines the vertical (y-axis) coordinate, in logical | |||
* units, of the point where drawing is to start. | |||
@@ -164,18 +164,33 @@ public class HwmfText { | |||
@Override | |||
public int init(LittleEndianInputStream leis, long recordSize, int recordFunction) throws IOException { | |||
stringLength = leis.readShort(); | |||
byte buf[] = new byte[stringLength+(stringLength&1)]; | |||
leis.readFully(buf); | |||
text = new String(buf, 0, stringLength, LocaleUtil.CHARSET_1252).trim(); | |||
rawTextBytes = new byte[stringLength+(stringLength&1)]; | |||
leis.readFully(rawTextBytes); | |||
yStart = leis.readShort(); | |||
xStart = leis.readShort(); | |||
return 3*LittleEndianConsts.SHORT_SIZE+buf.length; | |||
return 3*LittleEndianConsts.SHORT_SIZE+rawTextBytes.length; | |||
} | |||
@Override | |||
public void draw(HwmfGraphics ctx) { | |||
Rectangle2D bounds = new Rectangle2D.Double(xStart, yStart, 0, 0); | |||
ctx.drawString(text, bounds); | |||
ctx.drawString(getTextBytes(), bounds); | |||
} | |||
public String getText(Charset charset) { | |||
return new String(getTextBytes(), charset); | |||
} | |||
/** | |||
* | |||
* @return a copy of a trimmed byte array of rawTextBytes bytes. | |||
* This includes only the bytes from 0..stringLength. | |||
* This does not include the extra optional padding on the byte array. | |||
*/ | |||
private byte[] getTextBytes() { | |||
byte[] ret = new byte[stringLength]; | |||
System.arraycopy(rawTextBytes, 0, ret, 0, stringLength); | |||
return ret; | |||
} | |||
} | |||
@@ -264,7 +279,7 @@ public class HwmfText { | |||
* the length is odd, an extra byte is placed after it so that the following member (optional Dx) is | |||
* aligned on a 16-bit boundary. | |||
*/ | |||
private String text; | |||
private byte[] rawTextBytes; | |||
/** | |||
* An optional array of 16-bit signed integers that indicate the distance between | |||
* origins of adjacent character cells. For example, Dx[i] logical units separate the origins of | |||
@@ -300,10 +315,9 @@ public class HwmfText { | |||
size += 4*LittleEndianConsts.SHORT_SIZE; | |||
} | |||
byte buf[] = new byte[stringLength+(stringLength&1)]; | |||
leis.readFully(buf); | |||
text = new String(buf, 0, stringLength, LocaleUtil.CHARSET_1252); | |||
size += buf.length; | |||
rawTextBytes = new byte[stringLength+(stringLength&1)]; | |||
leis.readFully(rawTextBytes); | |||
size += rawTextBytes.length; | |||
if (size >= remainingRecordSize) { | |||
logger.log(POILogger.INFO, "META_EXTTEXTOUT doesn't contain character tracking info"); | |||
@@ -327,7 +341,23 @@ public class HwmfText { | |||
@Override | |||
public void draw(HwmfGraphics ctx) { | |||
Rectangle2D bounds = new Rectangle2D.Double(x, y, 0, 0); | |||
ctx.drawString(text, bounds, dx); | |||
ctx.drawString(getTextBytes(), bounds, dx); | |||
} | |||
public String getText(Charset charset) { | |||
return new String(getTextBytes(), charset); | |||
} | |||
/** | |||
* | |||
* @return a copy of a trimmed byte array of rawTextBytes bytes. | |||
* This includes only the bytes from 0..stringLength. | |||
* This does not include the extra optional padding on the byte array. | |||
*/ | |||
private byte[] getTextBytes() { | |||
byte[] ret = new byte[stringLength]; | |||
System.arraycopy(rawTextBytes, 0, ret, 0, stringLength); | |||
return ret; | |||
} | |||
} | |||
@@ -523,5 +553,9 @@ public class HwmfText { | |||
public void applyObject(HwmfGraphics ctx) { | |||
ctx.getProperties().setFont(font); | |||
} | |||
public HwmfFont getFont() { | |||
return font; | |||
} | |||
} | |||
} |
@@ -18,7 +18,9 @@ | |||
package org.apache.poi.hwmf; | |||
import static org.junit.Assert.assertEquals; | |||
import static org.junit.Assert.assertTrue; | |||
import javax.imageio.ImageIO; | |||
import java.awt.Dimension; | |||
import java.awt.Graphics2D; | |||
import java.awt.RenderingHints; | |||
@@ -31,21 +33,24 @@ import java.io.FileOutputStream; | |||
import java.io.FilterInputStream; | |||
import java.io.IOException; | |||
import java.net.URL; | |||
import java.nio.charset.Charset; | |||
import java.util.List; | |||
import java.util.Locale; | |||
import java.util.zip.ZipEntry; | |||
import java.util.zip.ZipInputStream; | |||
import javax.imageio.ImageIO; | |||
import org.apache.poi.POIDataSamples; | |||
import org.apache.poi.hwmf.record.HwmfFill.HwmfImageRecord; | |||
import org.apache.poi.hwmf.record.HwmfFont; | |||
import org.apache.poi.hwmf.record.HwmfRecord; | |||
import org.apache.poi.hwmf.record.HwmfRecordType; | |||
import org.apache.poi.hwmf.record.HwmfText; | |||
import org.apache.poi.hwmf.usermodel.HwmfPicture; | |||
import org.apache.poi.sl.usermodel.PictureData; | |||
import org.apache.poi.sl.usermodel.PictureData.PictureType; | |||
import org.apache.poi.sl.usermodel.SlideShow; | |||
import org.apache.poi.sl.usermodel.SlideShowFactory; | |||
import org.apache.poi.util.LocaleUtil; | |||
import org.apache.poi.util.Units; | |||
import org.junit.Ignore; | |||
import org.junit.Test; | |||
@@ -188,4 +193,33 @@ public class TestHwmfParsing { | |||
} | |||
} | |||
} | |||
@Test | |||
@Ignore("If we decide we can use common crawl file specified, we can turn this back on") | |||
public void testCyrillic() throws Exception { | |||
//TODO: move test file to framework and fix this | |||
File dir = new File("C:/somethingOrOther"); | |||
File f = new File(dir, "ZMLH54SPLI76NQ7XMKVB7SMUJA2HTXTS-2.wmf"); | |||
HwmfPicture wmf = new HwmfPicture(new FileInputStream(f)); | |||
Charset charset = LocaleUtil.CHARSET_1252; | |||
StringBuilder sb = new StringBuilder(); | |||
//this is pure hackery for specifying the font | |||
//this happens to work on this test file, but you need to | |||
//do what Graphics does by maintaining the stack, etc.! | |||
for (HwmfRecord r : wmf.getRecords()) { | |||
if (r.getRecordType().equals(HwmfRecordType.createFontIndirect)) { | |||
HwmfFont font = ((HwmfText.WmfCreateFontIndirect)r).getFont(); | |||
charset = (font.getCharSet().getCharset() == null) ? LocaleUtil.CHARSET_1252 : font.getCharSet().getCharset(); | |||
} | |||
if (r.getRecordType().equals(HwmfRecordType.extTextOut)) { | |||
HwmfText.WmfExtTextOut textOut = (HwmfText.WmfExtTextOut)r; | |||
sb.append(textOut.getText(charset)).append("\n"); | |||
} | |||
} | |||
String txt = sb.toString(); | |||
assertTrue(txt.contains("\u041E\u0431\u0449\u043E")); | |||
assertTrue(txt.contains("\u0411\u0430\u043B\u0430\u043D\u0441")); | |||
} | |||
} |