use of org.apache.tika.metadata.Metadata in project tika by apache.
the class TestParsingExample method testRecursiveParserWrapperExample.
@Test
public void testRecursiveParserWrapperExample() throws IOException, SAXException, TikaException {
List<Metadata> metadataList = parsingExample.recursiveParserWrapperExample();
assertEquals("Number of embedded documents + 1 for the container document", 12, metadataList.size());
Metadata m = metadataList.get(6);
//this is the location the embed3.txt text file within the outer .docx
assertEquals("/embed1.zip/embed2.zip/embed3.zip/embed3.txt", m.get("X-TIKA:embedded_resource_path"));
//it contains some html encoded content
assertContains("When in the Course", m.get("X-TIKA:content"));
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class ChmParser method parsePage.
private void parsePage(byte[] byteObject, Parser htmlParser, ContentHandler xhtml, ParseContext context) throws TikaException {
// throws IOException
InputStream stream = null;
Metadata metadata = new Metadata();
// -1
ContentHandler handler = new EmbeddedContentHandler(new BodyContentHandler(xhtml));
try {
stream = new ByteArrayInputStream(byteObject);
htmlParser.parse(stream, handler, metadata, context);
} catch (SAXException e) {
throw new RuntimeException(e);
} catch (IOException e) {
// Pushback overflow from tagsoup
}
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class TSDParser method parse.
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
//Try to parse TSD file
try (RereadableInputStream ris = new RereadableInputStream(stream, 2048, true, true)) {
Metadata TSDAndEmbeddedMetadata = new Metadata();
List<TSDMetas> tsdMetasList = this.extractMetas(ris);
this.buildMetas(tsdMetasList, metadata != null && metadata.size() > 0 ? TSDAndEmbeddedMetadata : metadata);
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
ris.rewind();
//Try to parse embedded file in TSD file
this.parseTSDContent(ris, handler, TSDAndEmbeddedMetadata, context);
xhtml.endDocument();
}
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class TIAParsingExample method parseURLStream.
public static void parseURLStream(String address) throws Exception {
Parser parser = new AutoDetectParser();
ContentHandler handler = new DefaultHandler();
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
try (InputStream stream = new GZIPInputStream(new URL(address).openStream())) {
parser.parse(stream, handler, metadata, context);
}
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class TIAParsingExample method useAutoDetectParser.
public static void useAutoDetectParser() throws Exception {
InputStream stream = new ByteArrayInputStream(new byte[0]);
ContentHandler handler = new DefaultHandler();
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
Parser parser = new AutoDetectParser();
parser.parse(stream, handler, metadata, context);
}
Aggregations