Search in sources :

Example 51 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class TestGDALParser method testParseBasicInfo.

@Test
public void testParseBasicInfo() {
    assumeTrue(canRun());
    final String expectedDriver = "netCDF/Network Common Data Format";
    final String expectedUpperRight = "512.0,    0.0";
    final String expectedUpperLeft = "0.0,    0.0";
    final String expectedLowerLeft = "0.0,  512.0";
    final String expectedLowerRight = "512.0,  512.0";
    final String expectedCoordinateSystem = "`'";
    final String expectedSize = "512, 512";
    GDALParser parser = new GDALParser();
    InputStream stream = TestGDALParser.class.getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
    Metadata met = new Metadata();
    BodyContentHandler handler = new BodyContentHandler();
    try {
        parser.parse(stream, handler, met, new ParseContext());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertNotNull(met);
    assertNotNull(met.get("Driver"));
    assertEquals(expectedDriver, met.get("Driver"));
    assumeTrue(met.get("Files") != null);
    assertNotNull(met.get("Coordinate System"));
    assertEquals(expectedCoordinateSystem, met.get("Coordinate System"));
    assertNotNull(met.get("Size"));
    assertEquals(expectedSize, met.get("Size"));
    assertNotNull(met.get("Upper Right"));
    assertEquals(expectedUpperRight, met.get("Upper Right"));
    assertNotNull(met.get("Upper Left"));
    assertEquals(expectedUpperLeft, met.get("Upper Left"));
    assertNotNull(met.get("Upper Right"));
    assertEquals(expectedLowerRight, met.get("Lower Right"));
    assertNotNull(met.get("Upper Right"));
    assertEquals(expectedLowerLeft, met.get("Lower Left"));
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) IOException(java.io.IOException) TikaException(org.apache.tika.exception.TikaException) SAXException(org.xml.sax.SAXException) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 52 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class TestGDALParser method testParseFITS.

@Test
public void testParseFITS() {
    String fitsFilename = "/test-documents/WFPC2u5780205r_c0fx.fits";
    assumeTrue(canRun());
    // If the exit code is 1 (meaning FITS isn't supported by the installed version of gdalinfo, don't run this test.
    String[] fitsCommand = { "gdalinfo", TestGDALParser.class.getResource(fitsFilename).getPath() };
    assumeTrue(ExternalParser.check(fitsCommand, 1));
    String expectedAllgMin = "-7.319537E1";
    String expectedAtodcorr = "COMPLETE";
    String expectedAtodfile = "uref$dbu1405iu.r1h";
    String expectedCalVersion = "                        ";
    String expectedCalibDef = "1466";
    GDALParser parser = new GDALParser();
    InputStream stream = TestGDALParser.class.getResourceAsStream(fitsFilename);
    Metadata met = new Metadata();
    BodyContentHandler handler = new BodyContentHandler();
    try {
        parser.parse(stream, handler, met, new ParseContext());
        assertNotNull(met);
        assertNotNull(met.get("ALLG-MIN"));
        assertEquals(expectedAllgMin, met.get("ALLG-MIN"));
        assertNotNull(met.get("ATODCORR"));
        assertEquals(expectedAtodcorr, met.get("ATODCORR"));
        assertNotNull(met.get("ATODFILE"));
        assertEquals(expectedAtodfile, met.get("ATODFILE"));
        assertNotNull(met.get("CAL_VER"));
        assertEquals(expectedCalVersion, met.get("CAL_VER"));
        assertNotNull(met.get("CALIBDEF"));
        assertEquals(expectedCalibDef, met.get("CALIBDEF"));
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) IOException(java.io.IOException) TikaException(org.apache.tika.exception.TikaException) SAXException(org.xml.sax.SAXException) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 53 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class GeoParserTest method testNulls.

@Test
public void testNulls() throws UnsupportedEncodingException, IOException, SAXException, TikaException {
    String text = "";
    Metadata metadata = new Metadata();
    ParseContext context = new ParseContext();
    GeoParserConfig config = new GeoParserConfig();
    context.set(GeoParserConfig.class, config);
    geoparser.parse(new ByteArrayInputStream(text.getBytes(UTF_8)), new BodyContentHandler(), metadata, context);
    assertNull(metadata.get("Geographic_NAME"));
    assertNull(metadata.get("Geographic_LONGITUDE"));
    assertNull(metadata.get("Geographic_LATITUDE"));
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) ByteArrayInputStream(java.io.ByteArrayInputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) Test(org.junit.Test)

Example 54 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class HDFParserTest method testHDF4.

@Test
public void testHDF4() throws Exception {
    if (System.getProperty("java.version").startsWith("1.5")) {
        return;
    }
    Parser parser = new HDFParser();
    ContentHandler handler = new BodyContentHandler();
    Metadata metadata = new Metadata();
    /*
       * this is a publicly available HDF4 file from the HD4 examples:
       * 
       * http://www.hdfgroup.org/training/hdf4_chunking/Chunkit/bin/input54kmdata.hdf
       */
    try (InputStream stream = HDFParser.class.getResourceAsStream("/test-documents/test.hdf")) {
        parser.parse(stream, handler, metadata, new ParseContext());
    }
    assertNotNull(metadata);
    assertEquals("Direct read of HDF4 file through CDM library", metadata.get("_History"));
    assertEquals("Ascending", metadata.get("Pass"));
    assertEquals("Hierarchical Data Format, version 4", metadata.get("File-Type-Description"));
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) HDFParser(org.apache.tika.parser.hdf.HDFParser) BodyContentHandler(org.apache.tika.sax.BodyContentHandler) ContentHandler(org.xml.sax.ContentHandler) Parser(org.apache.tika.parser.Parser) HDFParser(org.apache.tika.parser.hdf.HDFParser) Test(org.junit.Test)

Example 55 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class GribParserTest method testParseGlobalMetadata.

@Test
public void testParseGlobalMetadata() throws Exception {
    Parser parser = new GribParser();
    Metadata metadata = new Metadata();
    ContentHandler handler = new BodyContentHandler();
    try (InputStream stream = GribParser.class.getResourceAsStream("/test-documents/gdas1.forecmwf.2014062612.grib2")) {
        parser.parse(stream, handler, metadata, new ParseContext());
    }
    assertNotNull(metadata);
    String content = handler.toString();
    assertTrue(content.contains("dimensions:"));
    assertTrue(content.contains("variables:"));
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) BodyContentHandler(org.apache.tika.sax.BodyContentHandler) ContentHandler(org.xml.sax.ContentHandler) Parser(org.apache.tika.parser.Parser) Test(org.junit.Test)

Aggregations

Metadata (org.apache.tika.metadata.Metadata)651 Test (org.junit.Test)467 InputStream (java.io.InputStream)320 ParseContext (org.apache.tika.parser.ParseContext)283 BodyContentHandler (org.apache.tika.sax.BodyContentHandler)269 TikaTest (org.apache.tika.TikaTest)257 ContentHandler (org.xml.sax.ContentHandler)229 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)154 ByteArrayInputStream (java.io.ByteArrayInputStream)143 Parser (org.apache.tika.parser.Parser)136 TikaInputStream (org.apache.tika.io.TikaInputStream)133 IOException (java.io.IOException)66 DefaultHandler (org.xml.sax.helpers.DefaultHandler)59 TikaException (org.apache.tika.exception.TikaException)48 ExcelParserTest (org.apache.tika.parser.microsoft.ExcelParserTest)36 WordParserTest (org.apache.tika.parser.microsoft.WordParserTest)36 StringWriter (java.io.StringWriter)33 Tika (org.apache.tika.Tika)29 MediaType (org.apache.tika.mime.MediaType)29 SAXException (org.xml.sax.SAXException)29