Search in sources :

Example 11 with BodyContentHandler

use of org.apache.tika.sax.BodyContentHandler in project tika by apache.

the class TestGDALParser method testParseMetadata.

@Test
public void testParseMetadata() {
    assumeTrue(canRun());
    final String expectedNcInst = "NCAR (National Center for Atmospheric Research, Boulder, CO, USA)";
    final String expectedModelNameEnglish = "NCAR CCSM";
    final String expectedProgramId = "Source file unknown Version unknown Date unknown";
    final String expectedProjectId = "IPCC Fourth Assessment";
    final String expectedRealization = "1";
    final String expectedTitle = "model output prepared for IPCC AR4";
    final String expectedSub8Name = "\":ua";
    final String expectedSub8Desc = "[1x17x128x256] eastward_wind (32-bit floating-point)";
    GDALParser parser = new GDALParser();
    InputStream stream = TestGDALParser.class.getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
    Metadata met = new Metadata();
    BodyContentHandler handler = new BodyContentHandler();
    try {
        parser.parse(stream, handler, met, new ParseContext());
        assertNotNull(met);
        assertNotNull(met.get("NC_GLOBAL#institution"));
        assertEquals(expectedNcInst, met.get("NC_GLOBAL#institution"));
        assertNotNull(met.get("NC_GLOBAL#model_name_english"));
        assertEquals(expectedModelNameEnglish, met.get("NC_GLOBAL#model_name_english"));
        assertNotNull(met.get("NC_GLOBAL#prg_ID"));
        assertEquals(expectedProgramId, met.get("NC_GLOBAL#prg_ID"));
        assertNotNull(met.get("NC_GLOBAL#prg_ID"));
        assertEquals(expectedProgramId, met.get("NC_GLOBAL#prg_ID"));
        assertNotNull(met.get("NC_GLOBAL#project_id"));
        assertEquals(expectedProjectId, met.get("NC_GLOBAL#project_id"));
        assertNotNull(met.get("NC_GLOBAL#realization"));
        assertEquals(expectedRealization, met.get("NC_GLOBAL#realization"));
        assertNotNull(met.get("NC_GLOBAL#title"));
        assertEquals(expectedTitle, met.get("NC_GLOBAL#title"));
        assertNotNull(met.get("SUBDATASET_8_NAME"));
        assertTrue(met.get("SUBDATASET_8_NAME").endsWith(expectedSub8Name));
        assertNotNull(met.get("SUBDATASET_8_DESC"));
        assertEquals(expectedSub8Desc, met.get("SUBDATASET_8_DESC"));
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) IOException(java.io.IOException) TikaException(org.apache.tika.exception.TikaException) SAXException(org.xml.sax.SAXException) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 12 with BodyContentHandler

use of org.apache.tika.sax.BodyContentHandler in project tika by apache.

the class TestGDALParser method testParseBasicInfo.

@Test
public void testParseBasicInfo() {
    assumeTrue(canRun());
    final String expectedDriver = "netCDF/Network Common Data Format";
    final String expectedUpperRight = "512.0,    0.0";
    final String expectedUpperLeft = "0.0,    0.0";
    final String expectedLowerLeft = "0.0,  512.0";
    final String expectedLowerRight = "512.0,  512.0";
    final String expectedCoordinateSystem = "`'";
    final String expectedSize = "512, 512";
    GDALParser parser = new GDALParser();
    InputStream stream = TestGDALParser.class.getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
    Metadata met = new Metadata();
    BodyContentHandler handler = new BodyContentHandler();
    try {
        parser.parse(stream, handler, met, new ParseContext());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertNotNull(met);
    assertNotNull(met.get("Driver"));
    assertEquals(expectedDriver, met.get("Driver"));
    assumeTrue(met.get("Files") != null);
    assertNotNull(met.get("Coordinate System"));
    assertEquals(expectedCoordinateSystem, met.get("Coordinate System"));
    assertNotNull(met.get("Size"));
    assertEquals(expectedSize, met.get("Size"));
    assertNotNull(met.get("Upper Right"));
    assertEquals(expectedUpperRight, met.get("Upper Right"));
    assertNotNull(met.get("Upper Left"));
    assertEquals(expectedUpperLeft, met.get("Upper Left"));
    assertNotNull(met.get("Upper Right"));
    assertEquals(expectedLowerRight, met.get("Lower Right"));
    assertNotNull(met.get("Upper Right"));
    assertEquals(expectedLowerLeft, met.get("Lower Left"));
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) IOException(java.io.IOException) TikaException(org.apache.tika.exception.TikaException) SAXException(org.xml.sax.SAXException) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 13 with BodyContentHandler

use of org.apache.tika.sax.BodyContentHandler in project tika by apache.

the class TestGDALParser method testParseFITS.

@Test
public void testParseFITS() {
    String fitsFilename = "/test-documents/WFPC2u5780205r_c0fx.fits";
    assumeTrue(canRun());
    // If the exit code is 1 (meaning FITS isn't supported by the installed version of gdalinfo, don't run this test.
    String[] fitsCommand = { "gdalinfo", TestGDALParser.class.getResource(fitsFilename).getPath() };
    assumeTrue(ExternalParser.check(fitsCommand, 1));
    String expectedAllgMin = "-7.319537E1";
    String expectedAtodcorr = "COMPLETE";
    String expectedAtodfile = "uref$dbu1405iu.r1h";
    String expectedCalVersion = "                        ";
    String expectedCalibDef = "1466";
    GDALParser parser = new GDALParser();
    InputStream stream = TestGDALParser.class.getResourceAsStream(fitsFilename);
    Metadata met = new Metadata();
    BodyContentHandler handler = new BodyContentHandler();
    try {
        parser.parse(stream, handler, met, new ParseContext());
        assertNotNull(met);
        assertNotNull(met.get("ALLG-MIN"));
        assertEquals(expectedAllgMin, met.get("ALLG-MIN"));
        assertNotNull(met.get("ATODCORR"));
        assertEquals(expectedAtodcorr, met.get("ATODCORR"));
        assertNotNull(met.get("ATODFILE"));
        assertEquals(expectedAtodfile, met.get("ATODFILE"));
        assertNotNull(met.get("CAL_VER"));
        assertEquals(expectedCalVersion, met.get("CAL_VER"));
        assertNotNull(met.get("CALIBDEF"));
        assertEquals(expectedCalibDef, met.get("CALIBDEF"));
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) IOException(java.io.IOException) TikaException(org.apache.tika.exception.TikaException) SAXException(org.xml.sax.SAXException) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 14 with BodyContentHandler

use of org.apache.tika.sax.BodyContentHandler in project tika by apache.

the class GeoParserTest method testNulls.

@Test
public void testNulls() throws UnsupportedEncodingException, IOException, SAXException, TikaException {
    String text = "";
    Metadata metadata = new Metadata();
    ParseContext context = new ParseContext();
    GeoParserConfig config = new GeoParserConfig();
    context.set(GeoParserConfig.class, config);
    geoparser.parse(new ByteArrayInputStream(text.getBytes(UTF_8)), new BodyContentHandler(), metadata, context);
    assertNull(metadata.get("Geographic_NAME"));
    assertNull(metadata.get("Geographic_LONGITUDE"));
    assertNull(metadata.get("Geographic_LATITUDE"));
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) ByteArrayInputStream(java.io.ByteArrayInputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) Test(org.junit.Test)

Example 15 with BodyContentHandler

use of org.apache.tika.sax.BodyContentHandler in project tika by apache.

the class HDFParserTest method testHDF4.

@Test
public void testHDF4() throws Exception {
    if (System.getProperty("java.version").startsWith("1.5")) {
        return;
    }
    Parser parser = new HDFParser();
    ContentHandler handler = new BodyContentHandler();
    Metadata metadata = new Metadata();
    /*
       * this is a publicly available HDF4 file from the HD4 examples:
       * 
       * http://www.hdfgroup.org/training/hdf4_chunking/Chunkit/bin/input54kmdata.hdf
       */
    try (InputStream stream = HDFParser.class.getResourceAsStream("/test-documents/test.hdf")) {
        parser.parse(stream, handler, metadata, new ParseContext());
    }
    assertNotNull(metadata);
    assertEquals("Direct read of HDF4 file through CDM library", metadata.get("_History"));
    assertEquals("Ascending", metadata.get("Pass"));
    assertEquals("Hierarchical Data Format, version 4", metadata.get("File-Type-Description"));
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) HDFParser(org.apache.tika.parser.hdf.HDFParser) BodyContentHandler(org.apache.tika.sax.BodyContentHandler) ContentHandler(org.xml.sax.ContentHandler) Parser(org.apache.tika.parser.Parser) HDFParser(org.apache.tika.parser.hdf.HDFParser) Test(org.junit.Test)

Aggregations

BodyContentHandler (org.apache.tika.sax.BodyContentHandler)251 Metadata (org.apache.tika.metadata.Metadata)242 Test (org.junit.Test)213 ContentHandler (org.xml.sax.ContentHandler)202 InputStream (java.io.InputStream)189 ParseContext (org.apache.tika.parser.ParseContext)170 TikaTest (org.apache.tika.TikaTest)117 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)87 Parser (org.apache.tika.parser.Parser)81 ByteArrayInputStream (java.io.ByteArrayInputStream)65 TikaInputStream (org.apache.tika.io.TikaInputStream)65 ExcelParserTest (org.apache.tika.parser.microsoft.ExcelParserTest)24 WordParserTest (org.apache.tika.parser.microsoft.WordParserTest)24 TikaException (org.apache.tika.exception.TikaException)23 IOException (java.io.IOException)17 OfficeParser (org.apache.tika.parser.microsoft.OfficeParser)15 EmptyParser (org.apache.tika.parser.EmptyParser)14 SAXException (org.xml.sax.SAXException)13 MediaType (org.apache.tika.mime.MediaType)10 XHTMLContentHandler (org.apache.tika.sax.XHTMLContentHandler)10