use of org.apache.tika.parser.microsoft.OfficeParser in project tika by apache.
the class OOXMLParserTest method testExcelXLSB.
@Test
public void testExcelXLSB() throws Exception {
Detector detector = new DefaultDetector();
AutoDetectParser parser = new AutoDetectParser();
Metadata m = new Metadata();
m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
// Should be detected correctly
MediaType type;
try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb")) {
type = detector.detect(input, m);
assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
}
// OfficeParser won't handle it
assertEquals(false, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
// OOXMLParser will (soon) handle it
assertTrue((new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
// AutoDetectParser doesn't break on it
try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb")) {
ContentHandler handler = new BodyContentHandler(-1);
ParseContext context = new ParseContext();
context.set(Locale.class, Locale.US);
parser.parse(input, handler, m, context);
String content = handler.toString();
assertContains("This is an example spreadsheet", content);
}
}
use of org.apache.tika.parser.microsoft.OfficeParser in project tika by apache.
the class SolidworksParserTest method testPart2014SP0Parser.
/**
* Test the parsing of an solidWorks part in version 2014SP0
*/
@Test
public void testPart2014SP0Parser() throws Exception {
try (InputStream input = SolidworksParserTest.class.getResourceAsStream("/test-documents/testsolidworksPart2014SP0.SLDPRT")) {
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
new OfficeParser().parse(input, handler, metadata, new ParseContext());
//Check content type
assertEquals("application/sldworks", metadata.get(Metadata.CONTENT_TYPE));
//Check properties
assertEquals("2012-04-18T10:27:29Z", metadata.get(TikaCoreProperties.CREATED));
assertEquals(null, metadata.get(TikaCoreProperties.CONTRIBUTOR));
assertEquals("2013-11-28T12:38:28Z", metadata.get(Metadata.MODIFIED));
assertEquals("solidworks-dcom_dev", metadata.get(TikaCoreProperties.MODIFIER));
assertEquals(null, metadata.get(TikaCoreProperties.RELATION));
assertEquals(null, metadata.get(TikaCoreProperties.RIGHTS));
assertEquals(null, metadata.get(TikaCoreProperties.SOURCE));
assertEquals("", metadata.get(TikaCoreProperties.TITLE));
assertEquals("", metadata.get(TikaCoreProperties.KEYWORDS));
}
}
use of org.apache.tika.parser.microsoft.OfficeParser in project tika by apache.
the class SolidworksParserTest method testPart2013SP2Parser.
/**
* Test the parsing of an solidWorks part in version 2013SP2
*/
@Test
public void testPart2013SP2Parser() throws Exception {
try (InputStream input = SolidworksParserTest.class.getResourceAsStream("/test-documents/testsolidworksPart2013SP2.SLDPRT")) {
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
new OfficeParser().parse(input, handler, metadata, new ParseContext());
//Check content type
assertEquals("application/sldworks", metadata.get(Metadata.CONTENT_TYPE));
//Check properties
assertEquals("2012-04-18T10:27:29Z", metadata.get(TikaCoreProperties.CREATED));
assertEquals(null, metadata.get(TikaCoreProperties.CONTRIBUTOR));
assertEquals("2013-09-06T08:12:12Z", metadata.get(Metadata.MODIFIED));
assertEquals("solidworks-dcom_dev", metadata.get(TikaCoreProperties.MODIFIER));
assertEquals(null, metadata.get(TikaCoreProperties.RELATION));
assertEquals(null, metadata.get(TikaCoreProperties.RIGHTS));
assertEquals(null, metadata.get(TikaCoreProperties.SOURCE));
assertEquals("", metadata.get(TikaCoreProperties.TITLE));
assertEquals("", metadata.get(TikaCoreProperties.KEYWORDS));
}
}
use of org.apache.tika.parser.microsoft.OfficeParser in project tika by apache.
the class SolidworksParserTest method testDrawing2013SP2Parser.
/*
* Test the parsing of an solidWorks drawing in version 2013SP2
*/
@Test
public void testDrawing2013SP2Parser() throws Exception {
try (InputStream input = SolidworksParserTest.class.getResourceAsStream("/test-documents/testsolidworksDrawing2013SP2.SLDDRW")) {
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
new OfficeParser().parse(input, handler, metadata, new ParseContext());
//Check content type
assertEquals("application/sldworks", metadata.get(Metadata.CONTENT_TYPE));
//Check properties
assertEquals("2012-07-03T12:05:29Z", metadata.get(TikaCoreProperties.CREATED));
assertEquals(null, metadata.get(TikaCoreProperties.CONTRIBUTOR));
assertEquals("2013-09-06T08:06:57Z", metadata.get(Metadata.MODIFIED));
assertEquals("solidworks-dcom_dev", metadata.get(TikaCoreProperties.MODIFIER));
assertEquals(null, metadata.get(TikaCoreProperties.RELATION));
assertEquals(null, metadata.get(TikaCoreProperties.RIGHTS));
assertEquals(null, metadata.get(TikaCoreProperties.SOURCE));
assertEquals("", metadata.get(TikaCoreProperties.TITLE));
assertEquals("", metadata.get(TikaCoreProperties.KEYWORDS));
}
}
use of org.apache.tika.parser.microsoft.OfficeParser in project tika by apache.
the class SolidworksParserTest method testDrawing2014SP0Parser.
/**
* Test the parsing of an solidWorks drawing in version 2014SP0
*/
@Test
public void testDrawing2014SP0Parser() throws Exception {
try (InputStream input = SolidworksParserTest.class.getResourceAsStream("/test-documents/testsolidworksDrawing2014SP0.SLDDRW")) {
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
new OfficeParser().parse(input, handler, metadata, new ParseContext());
//Check content type
assertEquals("application/sldworks", metadata.get(Metadata.CONTENT_TYPE));
//Check properties
assertEquals("2012-07-03T12:05:29Z", metadata.get(TikaCoreProperties.CREATED));
assertEquals(null, metadata.get(TikaCoreProperties.CONTRIBUTOR));
assertEquals("2013-11-28T12:41:49Z", metadata.get(Metadata.MODIFIED));
assertEquals("solidworks-dcom_dev", metadata.get(TikaCoreProperties.MODIFIER));
assertEquals(null, metadata.get(TikaCoreProperties.RELATION));
assertEquals(null, metadata.get(TikaCoreProperties.RIGHTS));
assertEquals(null, metadata.get(TikaCoreProperties.SOURCE));
assertEquals("", metadata.get(TikaCoreProperties.TITLE));
assertEquals("", metadata.get(TikaCoreProperties.KEYWORDS));
}
}
Aggregations