use of org.apache.tika.metadata.Metadata in project tika by apache.
the class RollbackSoftware method rollback.
public void rollback(File deployArea) throws IOException, SAXException, TikaException {
LinkContentHandler handler = new LinkContentHandler();
Metadata met = new Metadata();
DeploymentAreaParser parser = new DeploymentAreaParser();
parser.parse(IOUtils.toInputStream(deployArea.getAbsolutePath(), UTF_8), handler, met);
List<Link> links = handler.getLinks();
if (links.size() < 2)
throw new IOException("Must have installed at least 2 versions!");
Collections.sort(links, new Comparator<Link>() {
public int compare(Link o1, Link o2) {
return o1.getText().compareTo(o2.getText());
}
});
this.updateVersion(links.get(links.size() - 2).getText());
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class SimpleComparerTest method testGetContent.
@Test
public void testGetContent() throws Exception {
Metadata m = new Metadata();
m.add(RecursiveParserWrapper.TIKA_CONTENT, "0123456789");
Map<Cols, String> data = new HashMap<>();
String content = getContent(m, 10, data);
assertEquals(10, content.length());
assertEquals("FALSE", data.get(Cols.CONTENT_TRUNCATED_AT_MAX_LEN));
content = getContent(m, 4, data);
assertEquals(4, content.length());
assertEquals("TRUE", data.get(Cols.CONTENT_TRUNCATED_AT_MAX_LEN));
//test Metadata with no content
content = getContent(new Metadata(), 10, data);
assertEquals(0, content.length());
assertEquals("FALSE", data.get(Cols.CONTENT_TRUNCATED_AT_MAX_LEN));
//test null Metadata
content = getContent(null, 10, data);
assertEquals(0, content.length());
assertEquals("FALSE", data.get(Cols.CONTENT_TRUNCATED_AT_MAX_LEN));
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class SimpleComparerTest method testAttachmentCounts.
@Test
public void testAttachmentCounts() {
List<Metadata> list = new ArrayList<>();
Metadata m0 = new Metadata();
//bad data should be ignored
m0.set(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH, "dir1/dir2/file.zip");
//in the first metadata object
list.add(m0);
Metadata m1 = new Metadata();
m1.set(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH, "/f1.docx/f2.zip/text1.txt");
list.add(m1);
Metadata m2 = new Metadata();
m2.set(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH, "/f1.docx/f2.zip/text2.txt");
list.add(m2);
Metadata m3 = new Metadata();
m3.set(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH, "/f1.docx/f2.zip");
list.add(m3);
Metadata m4 = new Metadata();
m4.set(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH, "/f1.docx");
list.add(m4);
Metadata m5 = new Metadata();
m5.set(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH, "/f1.docx/text3.txt");
list.add(m5);
List<Integer> counts = AbstractProfiler.countAttachments(list);
List<Integer> expected = new ArrayList<>();
expected.add(5);
expected.add(0);
expected.add(0);
expected.add(2);
expected.add(4);
expected.add(0);
assertEquals(expected, counts);
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class ExtractEmbeddedFiles method extract.
public void extract(InputStream is, Path outputDir) throws SAXException, TikaException, IOException {
Metadata m = new Metadata();
ParseContext c = new ParseContext();
ContentHandler h = new BodyContentHandler(-1);
c.set(Parser.class, parser);
EmbeddedDocumentExtractor ex = new MyEmbeddedDocumentExtractor(outputDir, c);
c.set(EmbeddedDocumentExtractor.class, ex);
parser.parse(is, h, m, c);
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class Language method languageDetectionWithHandler.
public static void languageDetectionWithHandler() throws Exception {
LanguageHandler handler = new LanguageHandler();
new AutoDetectParser().parse(System.in, handler, new Metadata(), new ParseContext());
LanguageResult result = handler.getLanguage();
System.out.println(result.getLanguage());
}
Aggregations