Search in sources :

Example 16 with ParseContext

use of org.apache.tika.parser.ParseContext in project tika by apache.

the class TestGDALParser method testParseBasicInfo.

@Test
public void testParseBasicInfo() {
    assumeTrue(canRun());
    final String expectedDriver = "netCDF/Network Common Data Format";
    final String expectedUpperRight = "512.0,    0.0";
    final String expectedUpperLeft = "0.0,    0.0";
    final String expectedLowerLeft = "0.0,  512.0";
    final String expectedLowerRight = "512.0,  512.0";
    final String expectedCoordinateSystem = "`'";
    final String expectedSize = "512, 512";
    GDALParser parser = new GDALParser();
    InputStream stream = TestGDALParser.class.getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
    Metadata met = new Metadata();
    BodyContentHandler handler = new BodyContentHandler();
    try {
        parser.parse(stream, handler, met, new ParseContext());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertNotNull(met);
    assertNotNull(met.get("Driver"));
    assertEquals(expectedDriver, met.get("Driver"));
    assumeTrue(met.get("Files") != null);
    assertNotNull(met.get("Coordinate System"));
    assertEquals(expectedCoordinateSystem, met.get("Coordinate System"));
    assertNotNull(met.get("Size"));
    assertEquals(expectedSize, met.get("Size"));
    assertNotNull(met.get("Upper Right"));
    assertEquals(expectedUpperRight, met.get("Upper Right"));
    assertNotNull(met.get("Upper Left"));
    assertEquals(expectedUpperLeft, met.get("Upper Left"));
    assertNotNull(met.get("Upper Right"));
    assertEquals(expectedLowerRight, met.get("Lower Right"));
    assertNotNull(met.get("Upper Right"));
    assertEquals(expectedLowerLeft, met.get("Lower Left"));
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) IOException(java.io.IOException) TikaException(org.apache.tika.exception.TikaException) SAXException(org.xml.sax.SAXException) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 17 with ParseContext

use of org.apache.tika.parser.ParseContext in project tika by apache.

the class TestGDALParser method testParseFITS.

@Test
public void testParseFITS() {
    String fitsFilename = "/test-documents/WFPC2u5780205r_c0fx.fits";
    assumeTrue(canRun());
    // If the exit code is 1 (meaning FITS isn't supported by the installed version of gdalinfo, don't run this test.
    String[] fitsCommand = { "gdalinfo", TestGDALParser.class.getResource(fitsFilename).getPath() };
    assumeTrue(ExternalParser.check(fitsCommand, 1));
    String expectedAllgMin = "-7.319537E1";
    String expectedAtodcorr = "COMPLETE";
    String expectedAtodfile = "uref$dbu1405iu.r1h";
    String expectedCalVersion = "                        ";
    String expectedCalibDef = "1466";
    GDALParser parser = new GDALParser();
    InputStream stream = TestGDALParser.class.getResourceAsStream(fitsFilename);
    Metadata met = new Metadata();
    BodyContentHandler handler = new BodyContentHandler();
    try {
        parser.parse(stream, handler, met, new ParseContext());
        assertNotNull(met);
        assertNotNull(met.get("ALLG-MIN"));
        assertEquals(expectedAllgMin, met.get("ALLG-MIN"));
        assertNotNull(met.get("ATODCORR"));
        assertEquals(expectedAtodcorr, met.get("ATODCORR"));
        assertNotNull(met.get("ATODFILE"));
        assertEquals(expectedAtodfile, met.get("ATODFILE"));
        assertNotNull(met.get("CAL_VER"));
        assertEquals(expectedCalVersion, met.get("CAL_VER"));
        assertNotNull(met.get("CALIBDEF"));
        assertEquals(expectedCalibDef, met.get("CALIBDEF"));
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) IOException(java.io.IOException) TikaException(org.apache.tika.exception.TikaException) SAXException(org.xml.sax.SAXException) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 18 with ParseContext

use of org.apache.tika.parser.ParseContext in project tika by apache.

the class GeoParserTest method testNulls.

@Test
public void testNulls() throws UnsupportedEncodingException, IOException, SAXException, TikaException {
    String text = "";
    Metadata metadata = new Metadata();
    ParseContext context = new ParseContext();
    GeoParserConfig config = new GeoParserConfig();
    context.set(GeoParserConfig.class, config);
    geoparser.parse(new ByteArrayInputStream(text.getBytes(UTF_8)), new BodyContentHandler(), metadata, context);
    assertNull(metadata.get("Geographic_NAME"));
    assertNull(metadata.get("Geographic_LONGITUDE"));
    assertNull(metadata.get("Geographic_LATITUDE"));
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) ByteArrayInputStream(java.io.ByteArrayInputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) Test(org.junit.Test)

Example 19 with ParseContext

use of org.apache.tika.parser.ParseContext in project tika by apache.

the class HDFParserTest method testHDF4.

@Test
public void testHDF4() throws Exception {
    if (System.getProperty("java.version").startsWith("1.5")) {
        return;
    }
    Parser parser = new HDFParser();
    ContentHandler handler = new BodyContentHandler();
    Metadata metadata = new Metadata();
    /*
       * this is a publicly available HDF4 file from the HD4 examples:
       * 
       * http://www.hdfgroup.org/training/hdf4_chunking/Chunkit/bin/input54kmdata.hdf
       */
    try (InputStream stream = HDFParser.class.getResourceAsStream("/test-documents/test.hdf")) {
        parser.parse(stream, handler, metadata, new ParseContext());
    }
    assertNotNull(metadata);
    assertEquals("Direct read of HDF4 file through CDM library", metadata.get("_History"));
    assertEquals("Ascending", metadata.get("Pass"));
    assertEquals("Hierarchical Data Format, version 4", metadata.get("File-Type-Description"));
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) HDFParser(org.apache.tika.parser.hdf.HDFParser) BodyContentHandler(org.apache.tika.sax.BodyContentHandler) ContentHandler(org.xml.sax.ContentHandler) Parser(org.apache.tika.parser.Parser) HDFParser(org.apache.tika.parser.hdf.HDFParser) Test(org.junit.Test)

Example 20 with ParseContext

use of org.apache.tika.parser.ParseContext in project tika by apache.

the class GribParserTest method testParseGlobalMetadata.

@Test
public void testParseGlobalMetadata() throws Exception {
    Parser parser = new GribParser();
    Metadata metadata = new Metadata();
    ContentHandler handler = new BodyContentHandler();
    try (InputStream stream = GribParser.class.getResourceAsStream("/test-documents/gdas1.forecmwf.2014062612.grib2")) {
        parser.parse(stream, handler, metadata, new ParseContext());
    }
    assertNotNull(metadata);
    String content = handler.toString();
    assertTrue(content.contains("dimensions:"));
    assertTrue(content.contains("variables:"));
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) BodyContentHandler(org.apache.tika.sax.BodyContentHandler) ContentHandler(org.xml.sax.ContentHandler) Parser(org.apache.tika.parser.Parser) Test(org.junit.Test)

Aggregations

ParseContext (org.apache.tika.parser.ParseContext)336 Metadata (org.apache.tika.metadata.Metadata)281 Test (org.junit.Test)260 InputStream (java.io.InputStream)195 BodyContentHandler (org.apache.tika.sax.BodyContentHandler)195 TikaTest (org.apache.tika.TikaTest)186 ContentHandler (org.xml.sax.ContentHandler)163 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)117 Parser (org.apache.tika.parser.Parser)107 ByteArrayInputStream (java.io.ByteArrayInputStream)91 TikaInputStream (org.apache.tika.io.TikaInputStream)77 DefaultHandler (org.xml.sax.helpers.DefaultHandler)52 ExcelParserTest (org.apache.tika.parser.microsoft.ExcelParserTest)31 WordParserTest (org.apache.tika.parser.microsoft.WordParserTest)31 TikaException (org.apache.tika.exception.TikaException)29 StringWriter (java.io.StringWriter)26 IOException (java.io.IOException)24 SAXException (org.xml.sax.SAXException)24 CompositeParser (org.apache.tika.parser.CompositeParser)22 FileInputStream (java.io.FileInputStream)19