Search in sources :

Example 1 with HwmfFont

use of org.apache.poi.hwmf.record.HwmfFont in project poi by apache.

the class TestHwmfParsing method testCyrillic.

@Test
@Ignore("If we decide we can use common crawl file specified, we can turn this back on")
public void testCyrillic() throws Exception {
    //TODO: move test file to framework and fix this
    File dir = new File("C:/somethingOrOther");
    File f = new File(dir, "ZMLH54SPLI76NQ7XMKVB7SMUJA2HTXTS-2.wmf");
    HwmfPicture wmf = new HwmfPicture(new FileInputStream(f));
    Charset charset = LocaleUtil.CHARSET_1252;
    StringBuilder sb = new StringBuilder();
    //do what Graphics does by maintaining the stack, etc.!
    for (HwmfRecord r : wmf.getRecords()) {
        if (r.getRecordType().equals(HwmfRecordType.createFontIndirect)) {
            HwmfFont font = ((HwmfText.WmfCreateFontIndirect) r).getFont();
            charset = (font.getCharSet().getCharset() == null) ? LocaleUtil.CHARSET_1252 : font.getCharSet().getCharset();
        }
        if (r.getRecordType().equals(HwmfRecordType.extTextOut)) {
            HwmfText.WmfExtTextOut textOut = (HwmfText.WmfExtTextOut) r;
            sb.append(textOut.getText(charset)).append("\n");
        }
    }
    String txt = sb.toString();
    assertContains(txt, "Общо");
    assertContains(txt, "Баланс");
}
Also used : HwmfRecord(org.apache.poi.hwmf.record.HwmfRecord) Charset(java.nio.charset.Charset) HwmfText(org.apache.poi.hwmf.record.HwmfText) File(java.io.File) FileInputStream(java.io.FileInputStream) HwmfFont(org.apache.poi.hwmf.record.HwmfFont) HwmfPicture(org.apache.poi.hwmf.usermodel.HwmfPicture) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 2 with HwmfFont

use of org.apache.poi.hwmf.record.HwmfFont in project poi by apache.

the class TestHwmfParsing method testShift_JIS.

@Test
@Ignore("If we decide we can use the common crawl file attached to Bug 60677, " + "we can turn this back on")
public void testShift_JIS() throws Exception {
    //TODO: move test file to framework and fix this
    File f = new File("C:/data/file8.wmf");
    HwmfPicture wmf = new HwmfPicture(new FileInputStream(f));
    Charset charset = LocaleUtil.CHARSET_1252;
    StringBuilder sb = new StringBuilder();
    //do what Graphics does by maintaining the stack, etc.!
    for (HwmfRecord r : wmf.getRecords()) {
        if (r.getRecordType().equals(HwmfRecordType.createFontIndirect)) {
            HwmfFont font = ((HwmfText.WmfCreateFontIndirect) r).getFont();
            charset = (font.getCharSet().getCharset() == null) ? LocaleUtil.CHARSET_1252 : font.getCharSet().getCharset();
        }
        if (r.getRecordType().equals(HwmfRecordType.extTextOut)) {
            HwmfText.WmfExtTextOut textOut = (HwmfText.WmfExtTextOut) r;
            sb.append(textOut.getText(charset)).append("\n");
        }
    }
    String txt = sb.toString();
    assertContains(txt, "航空情報業務へのGIS");
}
Also used : HwmfRecord(org.apache.poi.hwmf.record.HwmfRecord) Charset(java.nio.charset.Charset) HwmfText(org.apache.poi.hwmf.record.HwmfText) File(java.io.File) FileInputStream(java.io.FileInputStream) HwmfFont(org.apache.poi.hwmf.record.HwmfFont) HwmfPicture(org.apache.poi.hwmf.usermodel.HwmfPicture) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 3 with HwmfFont

use of org.apache.poi.hwmf.record.HwmfFont in project poi by apache.

the class HwmfGraphics method drawString.

public void drawString(byte[] text, Rectangle2D bounds, int[] dx) {
    HwmfFont font = prop.getFont();
    if (font == null || text == null || text.length == 0) {
        return;
    }
    double fontH = getFontHeight(font);
    // TODO: another approx. ...
    double fontW = fontH / 1.8;
    int len = text.length;
    Charset charset = (font.getCharSet().getCharset() == null) ? DEFAULT_CHARSET : font.getCharSet().getCharset();
    String textString = new String(text, charset);
    AttributedString as = new AttributedString(textString);
    if (dx == null || dx.length == 0) {
        addAttributes(as, font);
    } else {
        int[] dxNormed = dx;
        //dxNormed[1] = 14 textString.get(1) = U+30ED
        if (textString.length() != text.length) {
            int codePoints = textString.codePointCount(0, textString.length());
            dxNormed = new int[codePoints];
            int dxPosition = 0;
            for (int offset = 0; offset < textString.length(); ) {
                dxNormed[offset] = dx[dxPosition];
                int[] chars = new int[1];
                int cp = textString.codePointAt(offset);
                chars[0] = cp;
                //now figure out how many bytes it takes to encode that
                //code point in the charset
                int byteLength = new String(chars, 0, chars.length).getBytes(charset).length;
                dxPosition += byteLength;
                offset += Character.charCount(cp);
            }
        }
        for (int i = 0; i < dxNormed.length; i++) {
            addAttributes(as, font);
            // therefore we need to add the additional/suffix width to the next char
            if (i < dxNormed.length - 1) {
                as.addAttribute(TextAttribute.TRACKING, (dxNormed[i] - fontW) / fontH, i + 1, i + 2);
            }
        }
    }
    double angle = Math.toRadians(-font.getEscapement() / 10.);
    final AffineTransform at = graphicsCtx.getTransform();
    try {
        graphicsCtx.translate(bounds.getX(), bounds.getY() + fontH);
        graphicsCtx.rotate(angle);
        if (prop.getBkMode() == HwmfBkMode.OPAQUE) {
            // TODO: validate bounds
            graphicsCtx.setBackground(prop.getBackgroundColor().getColor());
            graphicsCtx.fill(new Rectangle2D.Double(0, 0, bounds.getWidth(), bounds.getHeight()));
        }
        graphicsCtx.setColor(prop.getTextColor().getColor());
        // (float)bounds.getX(), (float)bounds.getY());
        graphicsCtx.drawString(as.getIterator(), 0, 0);
    } finally {
        graphicsCtx.setTransform(at);
    }
}
Also used : AttributedString(java.text.AttributedString) Rectangle2D(java.awt.geom.Rectangle2D) Charset(java.nio.charset.Charset) AffineTransform(java.awt.geom.AffineTransform) AttributedString(java.text.AttributedString) HwmfFont(org.apache.poi.hwmf.record.HwmfFont) TexturePaint(java.awt.TexturePaint) Paint(java.awt.Paint)

Example 4 with HwmfFont

use of org.apache.poi.hwmf.record.HwmfFont in project tika by apache.

the class WMFParser method parse.

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
    XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
    xhtml.startDocument();
    try {
        HwmfPicture picture = new HwmfPicture(stream);
        //to determine when to keep two text parts on the same line
        for (HwmfRecord record : picture.getRecords()) {
            Charset charset = LocaleUtil.CHARSET_1252;
            //This fix should be done within POI
            if (record.getRecordType().equals(HwmfRecordType.createFontIndirect)) {
                HwmfFont font = ((HwmfText.WmfCreateFontIndirect) record).getFont();
                charset = (font.getCharSet() == null || font.getCharSet().getCharset() == null) ? LocaleUtil.CHARSET_1252 : font.getCharSet().getCharset();
            }
            if (record.getRecordType().equals(HwmfRecordType.extTextOut)) {
                HwmfText.WmfExtTextOut textOut = (HwmfText.WmfExtTextOut) record;
                xhtml.startElement("p");
                xhtml.characters(textOut.getText(charset));
                xhtml.endElement("p");
            } else if (record.getRecordType().equals(HwmfRecordType.textOut)) {
                HwmfText.WmfTextOut textOut = (HwmfText.WmfTextOut) record;
                xhtml.startElement("p");
                xhtml.characters(textOut.getText(charset));
                xhtml.endElement("p");
            }
        }
    } catch (RecordFormatException e) {
        //POI's hwmfparser can throw these for "parse exceptions"
        throw new TikaException(e.getMessage(), e);
    } catch (RuntimeException e) {
        //convert Runtime to RecordFormatExceptions
        throw new TikaException(e.getMessage(), e);
    } catch (AssertionError e) {
        //POI's hwmfparser can throw these for parse exceptions
        throw new TikaException(e.getMessage(), e);
    }
    xhtml.endDocument();
}
Also used : TikaException(org.apache.tika.exception.TikaException) HwmfRecord(org.apache.poi.hwmf.record.HwmfRecord) Charset(java.nio.charset.Charset) HwmfText(org.apache.poi.hwmf.record.HwmfText) XHTMLContentHandler(org.apache.tika.sax.XHTMLContentHandler) HwmfFont(org.apache.poi.hwmf.record.HwmfFont) HwmfPicture(org.apache.poi.hwmf.usermodel.HwmfPicture) RecordFormatException(org.apache.poi.util.RecordFormatException)

Aggregations

Charset (java.nio.charset.Charset)4 HwmfFont (org.apache.poi.hwmf.record.HwmfFont)4 HwmfRecord (org.apache.poi.hwmf.record.HwmfRecord)3 HwmfText (org.apache.poi.hwmf.record.HwmfText)3 HwmfPicture (org.apache.poi.hwmf.usermodel.HwmfPicture)3 File (java.io.File)2 FileInputStream (java.io.FileInputStream)2 Ignore (org.junit.Ignore)2 Test (org.junit.Test)2 Paint (java.awt.Paint)1 TexturePaint (java.awt.TexturePaint)1 AffineTransform (java.awt.geom.AffineTransform)1 Rectangle2D (java.awt.geom.Rectangle2D)1 AttributedString (java.text.AttributedString)1 RecordFormatException (org.apache.poi.util.RecordFormatException)1 TikaException (org.apache.tika.exception.TikaException)1 XHTMLContentHandler (org.apache.tika.sax.XHTMLContentHandler)1