use of org.apache.poi.hssf.extractor.ExcelExtractor in project poi by apache.
the class TestBugs method bug51535.
/**
* Large row numbers and NPOIFS vs POIFS
*/
@SuppressWarnings("resource")
@Test
public void bug51535() throws Exception {
byte[] data = HSSFITestDataProvider.instance.getTestDataFileContent("51535.xls");
HSSFWorkbook wbPOIFS = new HSSFWorkbook(new POIFSFileSystem(new ByteArrayInputStream(data)).getRoot(), false);
HSSFWorkbook wbNPOIFS = new HSSFWorkbook(new NPOIFSFileSystem(new ByteArrayInputStream(data)).getRoot(), false);
for (HSSFWorkbook wb : new HSSFWorkbook[] { wbPOIFS, wbNPOIFS }) {
assertEquals(3, wb.getNumberOfSheets());
// Check directly
HSSFSheet s = wb.getSheetAt(0);
assertEquals("Top Left Cell", s.getRow(0).getCell(0).getStringCellValue());
assertEquals("Top Right Cell", s.getRow(0).getCell(255).getStringCellValue());
assertEquals("Bottom Left Cell", s.getRow(65535).getCell(0).getStringCellValue());
assertEquals("Bottom Right Cell", s.getRow(65535).getCell(255).getStringCellValue());
// Extract and check
ExcelExtractor ex = new ExcelExtractor(wb);
String text = ex.getText();
assertContains(text, "Top Left Cell");
assertContains(text, "Top Right Cell");
assertContains(text, "Bottom Left Cell");
assertContains(text, "Bottom Right Cell");
ex.close();
}
}
use of org.apache.poi.hssf.extractor.ExcelExtractor in project poi by apache.
the class TestXSSFEventBasedExcelExtractor method testComparedToOLE2.
/**
* Test that we return pretty much the same as
* ExcelExtractor does, when we're both passed
* the same file, just saved as xls and xlsx
*/
@Test
public void testComparedToOLE2() throws Exception {
// A fairly simple file - ooxml
XSSFEventBasedExcelExtractor ooxmlExtractor = getExtractor("SampleSS.xlsx");
ExcelExtractor ole2Extractor = new ExcelExtractor(HSSFTestDataSamples.openSampleWorkbook("SampleSS.xls"));
POITextExtractor[] extractors = new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
for (POITextExtractor extractor : extractors) {
String text = extractor.getText().replaceAll("[\r\t]", "");
assertStartsWith(text, "First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n");
Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
Matcher m = pattern.matcher(text);
assertTrue(m.matches());
}
ole2Extractor.close();
ooxmlExtractor.close();
}
use of org.apache.poi.hssf.extractor.ExcelExtractor in project poi by apache.
the class TestXSSFExcelExtractor method testComparedToOLE2.
/**
* Test that we return pretty much the same as
* ExcelExtractor does, when we're both passed
* the same file, just saved as xls and xlsx
*/
public void testComparedToOLE2() throws IOException {
// A fairly simple file - ooxml
XSSFExcelExtractor ooxmlExtractor = getExtractor("SampleSS.xlsx");
ExcelExtractor ole2Extractor = new ExcelExtractor(HSSFTestDataSamples.openSampleWorkbook("SampleSS.xls"));
Map<String, POITextExtractor> extractors = new HashMap<String, POITextExtractor>();
extractors.put("SampleSS.xlsx", ooxmlExtractor);
extractors.put("SampleSS.xls", ole2Extractor);
for (final Entry<String, POITextExtractor> e : extractors.entrySet()) {
String filename = e.getKey();
POITextExtractor extractor = e.getValue();
String text = extractor.getText().replaceAll("[\r\t]", "");
assertStartsWith(filename, text, "First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n");
Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
Matcher m = pattern.matcher(text);
assertTrue(filename, m.matches());
}
ole2Extractor.close();
ooxmlExtractor.close();
}
use of org.apache.poi.hssf.extractor.ExcelExtractor in project poi by apache.
the class TestExtractorFactory method testPreferEventBased.
@Test
public void testPreferEventBased() throws Exception {
assertFalse(ExtractorFactory.getPreferEventExtractor());
assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
ExtractorFactory.setThreadPrefersEventExtractors(true);
assertTrue(ExtractorFactory.getPreferEventExtractor());
assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
ExtractorFactory.setAllThreadsPreferEventExtractors(false);
assertFalse(ExtractorFactory.getPreferEventExtractor());
assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
assertEquals(Boolean.FALSE, ExtractorFactory.getAllThreadsPreferEventExtractors());
ExtractorFactory.setAllThreadsPreferEventExtractors(null);
assertTrue(ExtractorFactory.getPreferEventExtractor());
assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
// Check we get the right extractors now
POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
assertTrue(extractor instanceof EventBasedExcelExtractor);
extractor.close();
extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
assertTrue(extractor.getText().length() > 200);
extractor.close();
extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
assertTrue(extractor instanceof XSSFEventBasedExcelExtractor);
extractor.close();
extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
assertTrue(extractor.getText().length() > 200);
extractor.close();
// Put back to normal
ExtractorFactory.setThreadPrefersEventExtractors(false);
assertFalse(ExtractorFactory.getPreferEventExtractor());
assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
// And back
extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
assertTrue(extractor instanceof ExcelExtractor);
extractor.close();
extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
assertTrue(extractor.getText().length() > 200);
extractor.close();
extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
assertTrue(extractor instanceof XSSFExcelExtractor);
extractor.close();
extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
assertTrue(extractor.getText().length() > 200);
extractor.close();
}
use of org.apache.poi.hssf.extractor.ExcelExtractor in project poi by apache.
the class TestExtractorFactory method testPOIFS.
@Test
public void testPOIFS() throws Exception {
// Excel
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))) instanceof ExcelExtractor);
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))).getText().length() > 200);
// Word
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc))) instanceof WordExtractor);
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc))).getText().length() > 120);
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc6))) instanceof Word6Extractor);
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc6))).getText().length() > 20);
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc95))) instanceof Word6Extractor);
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc95))).getText().length() > 120);
// PowerPoint
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt))) instanceof PowerPointExtractor);
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt))).getText().length() > 120);
// Visio
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))) instanceof VisioTextExtractor);
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50);
// Publisher
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(pub))) instanceof PublisherTextExtractor);
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(pub))).getText().length() > 50);
// Outlook msg
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(msg))) instanceof OutlookTextExtactor);
assertTrue(ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(msg))).getText().length() > 50);
// Text
try {
ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(txt)));
fail();
} catch (IOException e) {
// Good
}
}
Aggregations