use of org.apache.tika.metadata.Metadata in project tika by apache.
the class AutoDetectParserTest method testSpecificParserList.
/**
* Test case for TIKA-514. Provide constructor for AutoDetectParser that has explicit
* list of supported parsers.
* @see <a href="https://issues.apache.org/jira/browse/TIKA-514">TIKA-514</a>
*/
@Test
public void testSpecificParserList() throws Exception {
AutoDetectParser parser = new AutoDetectParser(new MyDetector(), new MyParser());
InputStream is = new ByteArrayInputStream("test".getBytes(UTF_8));
Metadata metadata = new Metadata();
parser.parse(is, new BodyContentHandler(), metadata, new ParseContext());
assertEquals("value", metadata.get("MyParser"));
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class TestMimeTypes method assertMagic.
private void assertMagic(String expected, byte[] prefix) throws IOException {
MediaType type = repo.detect(new ByteArrayInputStream(prefix), new Metadata());
assertNotNull(type);
assertEquals(expected, type.toString());
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class MetadataAwareLuceneIndexer method indexWithDublinCore.
public void indexWithDublinCore(File file) throws Exception {
Metadata met = new Metadata();
met.add(Metadata.CREATOR, "Manning");
met.add(Metadata.CREATOR, "Tika in Action");
met.set(Metadata.DATE, new Date());
met.set(Metadata.FORMAT, tika.detect(file));
met.set(DublinCore.SOURCE, file.toURI().toURL().toString());
met.add(Metadata.SUBJECT, "File");
met.add(Metadata.SUBJECT, "Indexing");
met.add(Metadata.SUBJECT, "Metadata");
met.set(Property.externalClosedChoise(Metadata.RIGHTS, "public", "private"), "public");
try (InputStream is = new FileInputStream(file)) {
tika.parse(is, met);
Document document = new Document();
for (String key : met.names()) {
String[] values = met.getValues(key);
for (String val : values) {
document.add(new Field(key, val, Store.YES, Index.ANALYZED));
}
writer.addDocument(document);
}
}
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class MyFirstTika method main.
public static void main(String[] args) throws Exception {
String filename = args[0];
TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
Metadata metadata = new Metadata();
String text = parseUsingComponents(filename, tikaConfig, metadata);
System.out.println("Parsed Metadata: ");
System.out.println(metadata);
System.out.println("Parsed Text: ");
System.out.println(text);
System.out.println("-------------------------");
metadata = new Metadata();
text = parseUsingAutoDetect(filename, tikaConfig, metadata);
System.out.println("Parsed Metadata: ");
System.out.println(metadata);
System.out.println("Parsed Text: ");
System.out.println(text);
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class ParsingExample method parseEmbeddedExample.
/**
* This example shows how to extract content from the outer document and all
* embedded documents. The key is to specify a {@link Parser} in the {@link ParseContext}.
*
* @return content, including from embedded documents
* @throws IOException
* @throws SAXException
* @throws TikaException
*/
public String parseEmbeddedExample() throws IOException, SAXException, TikaException {
AutoDetectParser parser = new AutoDetectParser();
BodyContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
context.set(Parser.class, parser);
try (InputStream stream = ParsingExample.class.getResourceAsStream("test_recursive_embedded.docx")) {
parser.parse(stream, handler, metadata, context);
return handler.toString();
}
}
Aggregations