Search in sources :

Example 61 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class ExecutableParserTest method testWin32Parser.

@Test
public void testWin32Parser() throws Exception {
    XMLResult r = getXML("testWindows-x86-32.exe");
    Metadata metadata = r.metadata;
    assertEquals("application/x-msdownload", metadata.get(Metadata.CONTENT_TYPE));
    assertEquals("2012-05-13T13:40:11Z", metadata.get(Metadata.CREATION_DATE));
    assertEquals(ExecutableParser.MACHINE_x86_32, metadata.get(ExecutableParser.MACHINE_TYPE));
    assertEquals("Little", metadata.get(ExecutableParser.ENDIAN));
    assertEquals("32", metadata.get(ExecutableParser.ARCHITECTURE_BITS));
    assertEquals("Windows", metadata.get(ExecutableParser.PLATFORM));
    //no text yet
    assertContains("<body />", r.xml);
}
Also used : Metadata(org.apache.tika.metadata.Metadata) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 62 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class SourceCodeParserTest method testAuthor.

@Test
public void testAuthor() throws Exception {
    Metadata metadata = createMetadata("text/x-c++src");
    getText(getResourceAsStream("/test-documents/testCPP.cpp"), sourceCodeParser, metadata);
    assertEquals("Hong-Thai Nguyen", metadata.get(TikaCoreProperties.CREATOR));
}
Also used : Metadata(org.apache.tika.metadata.Metadata) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 63 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class SourceCodeParserTest method testLoC.

@Test
public void testLoC() throws Exception {
    Metadata metadata = createMetadata("text/x-groovy");
    getText(getResourceAsStream("/test-documents/testGROOVY.groovy"), sourceCodeParser, metadata);
    assertEquals(metadata.get("LoC"), "9");
}
Also used : Metadata(org.apache.tika.metadata.Metadata) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 64 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class HtmlParserTest method testDetectOfCharset.

/**
     * Test case for TIKA-334
     *
     * @see <a href="https://issues.apache.org/jira/browse/TIKA-334">TIKA-334</a>
     */
@Test
public void testDetectOfCharset() throws Exception {
    String test = "<html><head><title>Ž</title></head><body></body></html>";
    Metadata metadata = new Metadata();
    new HtmlParser().parse(new ByteArrayInputStream(test.getBytes(UTF_8)), new BodyContentHandler(), metadata, new ParseContext());
    assertEquals("Ž", metadata.get(TikaCoreProperties.TITLE));
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) ByteArrayInputStream(java.io.ByteArrayInputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 65 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class HtmlParserTest method testCustomHtmlSchema.

// TIKA-1193
@Test
public void testCustomHtmlSchema() throws Exception {
    // Default schema does not allow tables inside anchors
    String test = "<html><body><a><table><tr><td>text</tr></tr></table></a></body></html>";
    Metadata metadata = new Metadata();
    LinkContentHandler linkContentHandler = new LinkContentHandler();
    new HtmlParser().parse(new ByteArrayInputStream(test.getBytes(ISO_8859_1)), linkContentHandler, metadata, new ParseContext());
    // Expect no anchor text
    assertEquals("", linkContentHandler.getLinks().get(0).getText());
    // We'll change the schema to allow tables inside anchors!
    Schema schema = new HTMLSchema();
    schema.elementType("a", HTMLSchema.M_ANY, 65535, 0);
    ParseContext parseContext = new ParseContext();
    parseContext.set(Schema.class, schema);
    linkContentHandler = new LinkContentHandler();
    new HtmlParser().parse(new ByteArrayInputStream(test.getBytes(ISO_8859_1)), linkContentHandler, metadata, parseContext);
    // Expect anchor text
    assertEquals("\ttext\n\n", linkContentHandler.getLinks().get(0).getText());
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) LinkContentHandler(org.apache.tika.sax.LinkContentHandler) HTMLSchema(org.ccil.cowan.tagsoup.HTMLSchema) Schema(org.ccil.cowan.tagsoup.Schema) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) HTMLSchema(org.ccil.cowan.tagsoup.HTMLSchema) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Aggregations

Metadata (org.apache.tika.metadata.Metadata)651 Test (org.junit.Test)467 InputStream (java.io.InputStream)320 ParseContext (org.apache.tika.parser.ParseContext)283 BodyContentHandler (org.apache.tika.sax.BodyContentHandler)269 TikaTest (org.apache.tika.TikaTest)257 ContentHandler (org.xml.sax.ContentHandler)229 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)154 ByteArrayInputStream (java.io.ByteArrayInputStream)143 Parser (org.apache.tika.parser.Parser)136 TikaInputStream (org.apache.tika.io.TikaInputStream)133 IOException (java.io.IOException)66 DefaultHandler (org.xml.sax.helpers.DefaultHandler)59 TikaException (org.apache.tika.exception.TikaException)48 ExcelParserTest (org.apache.tika.parser.microsoft.ExcelParserTest)36 WordParserTest (org.apache.tika.parser.microsoft.WordParserTest)36 StringWriter (java.io.StringWriter)33 Tika (org.apache.tika.Tika)29 MediaType (org.apache.tika.mime.MediaType)29 SAXException (org.xml.sax.SAXException)29