use of org.apache.tika.metadata.Metadata in project tika by apache.
the class ExecutableParserTest method testWin32Parser.
@Test
public void testWin32Parser() throws Exception {
XMLResult r = getXML("testWindows-x86-32.exe");
Metadata metadata = r.metadata;
assertEquals("application/x-msdownload", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("2012-05-13T13:40:11Z", metadata.get(Metadata.CREATION_DATE));
assertEquals(ExecutableParser.MACHINE_x86_32, metadata.get(ExecutableParser.MACHINE_TYPE));
assertEquals("Little", metadata.get(ExecutableParser.ENDIAN));
assertEquals("32", metadata.get(ExecutableParser.ARCHITECTURE_BITS));
assertEquals("Windows", metadata.get(ExecutableParser.PLATFORM));
//no text yet
assertContains("<body />", r.xml);
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class SourceCodeParserTest method testAuthor.
@Test
public void testAuthor() throws Exception {
Metadata metadata = createMetadata("text/x-c++src");
getText(getResourceAsStream("/test-documents/testCPP.cpp"), sourceCodeParser, metadata);
assertEquals("Hong-Thai Nguyen", metadata.get(TikaCoreProperties.CREATOR));
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class SourceCodeParserTest method testLoC.
@Test
public void testLoC() throws Exception {
Metadata metadata = createMetadata("text/x-groovy");
getText(getResourceAsStream("/test-documents/testGROOVY.groovy"), sourceCodeParser, metadata);
assertEquals(metadata.get("LoC"), "9");
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class HtmlParserTest method testDetectOfCharset.
/**
* Test case for TIKA-334
*
* @see <a href="https://issues.apache.org/jira/browse/TIKA-334">TIKA-334</a>
*/
@Test
public void testDetectOfCharset() throws Exception {
String test = "<html><head><title>Ž</title></head><body></body></html>";
Metadata metadata = new Metadata();
new HtmlParser().parse(new ByteArrayInputStream(test.getBytes(UTF_8)), new BodyContentHandler(), metadata, new ParseContext());
assertEquals("Ž", metadata.get(TikaCoreProperties.TITLE));
}
use of org.apache.tika.metadata.Metadata in project tika by apache.
the class HtmlParserTest method testCustomHtmlSchema.
// TIKA-1193
@Test
public void testCustomHtmlSchema() throws Exception {
// Default schema does not allow tables inside anchors
String test = "<html><body><a><table><tr><td>text</tr></tr></table></a></body></html>";
Metadata metadata = new Metadata();
LinkContentHandler linkContentHandler = new LinkContentHandler();
new HtmlParser().parse(new ByteArrayInputStream(test.getBytes(ISO_8859_1)), linkContentHandler, metadata, new ParseContext());
// Expect no anchor text
assertEquals("", linkContentHandler.getLinks().get(0).getText());
// We'll change the schema to allow tables inside anchors!
Schema schema = new HTMLSchema();
schema.elementType("a", HTMLSchema.M_ANY, 65535, 0);
ParseContext parseContext = new ParseContext();
parseContext.set(Schema.class, schema);
linkContentHandler = new LinkContentHandler();
new HtmlParser().parse(new ByteArrayInputStream(test.getBytes(ISO_8859_1)), linkContentHandler, metadata, parseContext);
// Expect anchor text
assertEquals("\ttext\n\n", linkContentHandler.getLinks().get(0).getText());
}
Aggregations