Search in sources :

Example 1 with HemfRecord

use of org.apache.poi.hemf.record.HemfRecord in project poi by apache.

the class HemfExtractorTest method testBasicMac.

@Test
public void testBasicMac() throws Exception {
    InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("SimpleEMF_mac.emf");
    HemfExtractor ex = new HemfExtractor(is);
    HemfHeader header = ex.getHeader();
    int records = 0;
    boolean extractedData = false;
    for (HemfRecord record : ex) {
        if (record.getRecordType() == HemfRecordType.comment) {
            AbstractHemfComment comment = ((HemfCommentRecord) record).getComment();
            if (comment instanceof HemfCommentPublic.MultiFormats) {
                for (HemfCommentPublic.HemfMultiFormatsData d : ((HemfCommentPublic.MultiFormats) comment).getData()) {
                    byte[] data = d.getData();
                    //make sure header starts at 0
                    assertEquals('%', data[0]);
                    assertEquals('P', data[1]);
                    assertEquals('D', data[2]);
                    assertEquals('F', data[3]);
                    //make sure byte array ends at EOF\n
                    assertEquals('E', data[data.length - 4]);
                    assertEquals('O', data[data.length - 3]);
                    assertEquals('F', data[data.length - 2]);
                    assertEquals('\n', data[data.length - 1]);
                    extractedData = true;
                }
            }
        }
        records++;
    }
    assertTrue(extractedData);
    assertEquals(header.getRecords() - 1, records);
}
Also used : InputStream(java.io.InputStream) AbstractHemfComment(org.apache.poi.hemf.record.AbstractHemfComment) HemfCommentPublic(org.apache.poi.hemf.record.HemfCommentPublic) HemfRecord(org.apache.poi.hemf.record.HemfRecord) HemfCommentRecord(org.apache.poi.hemf.record.HemfCommentRecord) HemfHeader(org.apache.poi.hemf.record.HemfHeader) Test(org.junit.Test)

Example 2 with HemfRecord

use of org.apache.poi.hemf.record.HemfRecord in project poi by apache.

the class HemfExtractorTest method testWindowsText.

@Test
public void testWindowsText() throws Exception {
    InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("SimpleEMF_windows.emf");
    HemfExtractor ex = new HemfExtractor(is);
    long lastY = -1;
    long lastX = -1;
    //derive this from the font or frame/bounds information
    long fudgeFactorX = 1000;
    StringBuilder sb = new StringBuilder();
    Set<String> expectedParts = new HashSet<String>();
    expectedParts.add("C:\\Users\\tallison\\");
    expectedParts.add("testPDF.pdf");
    int foundExpected = 0;
    for (HemfRecord record : ex) {
        if (record.getRecordType().equals(HemfRecordType.exttextoutw)) {
            HemfText.ExtTextOutW extTextOutW = (HemfText.ExtTextOutW) record;
            if (lastY > -1 && lastY != extTextOutW.getY()) {
                sb.append("\n");
                lastX = -1;
            }
            if (lastX > -1 && extTextOutW.getX() - lastX > fudgeFactorX) {
                sb.append(" ");
            }
            String txt = extTextOutW.getText();
            if (expectedParts.contains(txt)) {
                foundExpected++;
            }
            sb.append(txt);
            lastY = extTextOutW.getY();
            lastX = extTextOutW.getX();
        }
    }
    String txt = sb.toString();
    assertContains(txt, "C:\\Users\\tallison\\\n");
    assertContains(txt, "asf2-git-1.x\\tika-\n");
    assertEquals(expectedParts.size(), foundExpected);
}
Also used : InputStream(java.io.InputStream) HemfRecord(org.apache.poi.hemf.record.HemfRecord) HemfText(org.apache.poi.hemf.record.HemfText) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 3 with HemfRecord

use of org.apache.poi.hemf.record.HemfRecord in project poi by apache.

the class HemfExtractorTest method testBasicWindows.

@Test
public void testBasicWindows() throws Exception {
    InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("SimpleEMF_windows.emf");
    HemfExtractor ex = new HemfExtractor(is);
    HemfHeader header = ex.getHeader();
    assertEquals(27864, header.getBytes());
    assertEquals(31, header.getRecords());
    assertEquals(3, header.getHandles());
    assertEquals(346000, header.getMicrometersX());
    assertEquals(194000, header.getMicrometersY());
    int records = 0;
    for (HemfRecord record : ex) {
        records++;
    }
    assertEquals(header.getRecords() - 1, records);
}
Also used : InputStream(java.io.InputStream) HemfRecord(org.apache.poi.hemf.record.HemfRecord) HemfHeader(org.apache.poi.hemf.record.HemfHeader) Test(org.junit.Test)

Example 4 with HemfRecord

use of org.apache.poi.hemf.record.HemfRecord in project poi by apache.

the class HemfPlusExtractorTest method getCommentRecord.

private HemfCommentEMFPlus getCommentRecord(String testFileName, int recordIndex) throws Exception {
    InputStream is = null;
    HemfCommentEMFPlus returnRecord = null;
    try {
        is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream(testFileName);
        HemfExtractor ex = new HemfExtractor(is);
        int i = 0;
        for (HemfRecord record : ex) {
            if (i == recordIndex) {
                HemfCommentRecord commentRecord = ((HemfCommentRecord) record);
                returnRecord = (HemfCommentEMFPlus) commentRecord.getComment();
                break;
            }
            i++;
        }
    } finally {
        is.close();
    }
    return returnRecord;
}
Also used : InputStream(java.io.InputStream) HemfRecord(org.apache.poi.hemf.record.HemfRecord) HemfCommentRecord(org.apache.poi.hemf.record.HemfCommentRecord) HemfExtractor(org.apache.poi.hemf.extractor.HemfExtractor) HemfCommentEMFPlus(org.apache.poi.hemf.record.HemfCommentEMFPlus)

Example 5 with HemfRecord

use of org.apache.poi.hemf.record.HemfRecord in project poi by apache.

the class HemfExtractorTest method testMacText.

@Test
public void testMacText() throws Exception {
    InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("SimpleEMF_mac.emf");
    HemfExtractor ex = new HemfExtractor(is);
    long lastY = -1;
    long lastX = -1;
    //derive this from the font information!
    long fudgeFactorX = 1000;
    StringBuilder sb = new StringBuilder();
    for (HemfRecord record : ex) {
        if (record.getRecordType().equals(HemfRecordType.exttextoutw)) {
            HemfText.ExtTextOutW extTextOutW = (HemfText.ExtTextOutW) record;
            if (lastY > -1 && lastY != extTextOutW.getY()) {
                sb.append("\n");
                lastX = -1;
            }
            if (lastX > -1 && extTextOutW.getX() - lastX > fudgeFactorX) {
                sb.append(" ");
            }
            sb.append(extTextOutW.getText());
            lastY = extTextOutW.getY();
            lastX = extTextOutW.getX();
        }
    }
    String txt = sb.toString();
    assertContains(txt, "Tika http://incubator.apache.org");
    assertContains(txt, "Latest News\n");
}
Also used : InputStream(java.io.InputStream) HemfRecord(org.apache.poi.hemf.record.HemfRecord) HemfText(org.apache.poi.hemf.record.HemfText) Test(org.junit.Test)

Aggregations

HemfRecord (org.apache.poi.hemf.record.HemfRecord)6 InputStream (java.io.InputStream)5 Test (org.junit.Test)4 HemfCommentRecord (org.apache.poi.hemf.record.HemfCommentRecord)3 HemfText (org.apache.poi.hemf.record.HemfText)3 HemfExtractor (org.apache.poi.hemf.extractor.HemfExtractor)2 AbstractHemfComment (org.apache.poi.hemf.record.AbstractHemfComment)2 HemfCommentPublic (org.apache.poi.hemf.record.HemfCommentPublic)2 HemfHeader (org.apache.poi.hemf.record.HemfHeader)2 HashSet (java.util.HashSet)1 HemfCommentEMFPlus (org.apache.poi.hemf.record.HemfCommentEMFPlus)1 RecordFormatException (org.apache.poi.util.RecordFormatException)1 TikaException (org.apache.tika.exception.TikaException)1 EmbeddedDocumentExtractor (org.apache.tika.extractor.EmbeddedDocumentExtractor)1 XHTMLContentHandler (org.apache.tika.sax.XHTMLContentHandler)1