Search in sources :

Example 86 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestStatsSerde method testLazyBinarySerDe.

/**
 * Test LazyBinarySerDe
 */
public void testLazyBinarySerDe() throws Throwable {
    try {
        System.out.println("test: testLazyBinarySerDe");
        int num = 1000;
        Random r = new Random(1234);
        MyTestClass[] rows = new MyTestClass[num];
        for (int i = 0; i < num; i++) {
            MyTestClass t = new MyTestClass();
            ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
            t.randomFill(r, extraTypeInfo);
            rows[i] = t;
        }
        StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
        String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
        String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.LIST_COLUMNS, fieldNames);
        schema.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes);
        LazyBinarySerDe serDe = new LazyBinarySerDe();
        SerDeUtils.initializeSerDe(serDe, new Configuration(), schema, null);
        deserializeAndSerializeLazyBinary(serDe, rows, rowOI);
        System.out.println("test: testLazyBinarySerDe - OK");
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : Random(java.util.Random) Configuration(org.apache.hadoop.conf.Configuration) MyTestClass(org.apache.hadoop.hive.serde2.binarysortable.MyTestClass) ExtraTypeInfo(org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo) LazyBinarySerDe(org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe) Properties(java.util.Properties) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 87 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestLazyBinaryColumnarSerDe method testHandlingAlteredSchemas.

/**
 * HIVE-5788
 * <p>
 * Background: in cases of "add column", table metadata changes but data does not.  Columns
 * missing from the data but which are required by metadata are interpreted as null.
 * <p>
 * This tests the use-case of altering columns of a table with already some data, then adding more data
 * in the new schema, and seeing if this serde can to read both types of data from the resultant table.
 * @throws SerDeException
 */
public void testHandlingAlteredSchemas() throws SerDeException {
    StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(BeforeStruct.class, ObjectInspectorOptions.JAVA);
    String cols = ObjectInspectorUtils.getFieldNames(oi);
    Properties props = new Properties();
    props.setProperty(serdeConstants.LIST_COLUMNS, cols);
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
    // serialize some data in the schema before it is altered.
    LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
    SerDeUtils.initializeSerDe(serde, new Configuration(), props, null);
    BeforeStruct bs1 = new BeforeStruct();
    bs1.l1 = 1L;
    bs1.l2 = 2L;
    BytesRefArrayWritable braw1 = (BytesRefArrayWritable) serde.serialize(bs1, oi);
    // alter table add column: change the metadata
    oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(AfterStruct.class, ObjectInspectorOptions.JAVA);
    cols = ObjectInspectorUtils.getFieldNames(oi);
    props = new Properties();
    props.setProperty(serdeConstants.LIST_COLUMNS, cols);
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
    serde = new LazyBinaryColumnarSerDe();
    SerDeUtils.initializeSerDe(serde, new Configuration(), props, null);
    // serialize some data in the schema after it is altered.
    AfterStruct as = new AfterStruct();
    as.l1 = 11L;
    as.l2 = 12L;
    as.l3 = 13L;
    BytesRefArrayWritable braw2 = (BytesRefArrayWritable) serde.serialize(as, oi);
    // fetch operator
    serde = new LazyBinaryColumnarSerDe();
    SerDeUtils.initializeSerDe(serde, new Configuration(), props, null);
    // fetch the row inserted before schema is altered and verify
    LazyBinaryColumnarStruct struct1 = (LazyBinaryColumnarStruct) serde.deserialize(braw1);
    oi = (StructObjectInspector) serde.getObjectInspector();
    List<Object> objs1 = oi.getStructFieldsDataAsList(struct1);
    Assert.assertEquals(((LongWritable) objs1.get(0)).get(), 1L);
    Assert.assertEquals(((LongWritable) objs1.get(1)).get(), 2L);
    Assert.assertNull(objs1.get(2));
    // fetch the row inserted after schema is altered and verify
    LazyBinaryColumnarStruct struct2 = (LazyBinaryColumnarStruct) serde.deserialize(braw2);
    List<Object> objs2 = struct2.getFieldsAsList();
    Assert.assertEquals(((LongWritable) objs2.get(0)).get(), 11L);
    Assert.assertEquals(((LongWritable) objs2.get(1)).get(), 12L);
    Assert.assertEquals(((LongWritable) objs2.get(2)).get(), 13L);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 88 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestInputOutputFormat method testNonVectorReaderNoFooterSerialize.

@Test
public void testNonVectorReaderNoFooterSerialize() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    MockPath mockPath = new MockPath(fs, "mock:///mocktable1");
    conf.set("hive.orc.splits.include.file.footer", "false");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    for (InputSplit split : splits) {
        assertTrue("OrcSplit is expected", split instanceof OrcSplit);
        // ETL strategies will have start=3 (start of first stripe)
        assertTrue(split.toString().contains("start=3"));
        assertTrue(split.toString().contains("hasFooter=false"));
        assertTrue(split.toString().contains("hasBase=true"));
        assertTrue(split.toString().contains("deltas=0"));
        if (split instanceof OrcSplit) {
            assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter());
        }
        orcInputFormat.getRecordReader(split, conf, null);
    }
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: open to read footer - split 1 => mock:/mocktable1/0_0
    // call-2: open to read data - split 1 => mock:/mocktable1/0_0
    // call-3: open to read footer - split 2 => mock:/mocktable1/0_1
    // call-4: open to read data - split 2 => mock:/mocktable1/0_1
    assertEquals(4, readOpsDelta);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
}
Also used : InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 89 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestInputOutputFormat method testVectorReaderFooterSerialize.

@Test
public void testVectorReaderFooterSerialize() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    MockPath mockPath = new MockPath(fs, "mock:///mocktable4");
    conf.set("hive.orc.splits.include.file.footer", "true");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    JobConf jobConf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "mocktable4", inspector, true, 0);
    Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    for (InputSplit split : splits) {
        assertTrue("OrcSplit is expected", split instanceof OrcSplit);
        // ETL strategies will have start=3 (start of first stripe)
        assertTrue(split.toString().contains("start=3"));
        assertTrue(split.toString().contains("hasFooter=true"));
        assertTrue(split.toString().contains("hasBase=true"));
        assertTrue(split.toString().contains("deltas=0"));
        if (split instanceof OrcSplit) {
            assertTrue("Footer serialize test for vector reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter());
        }
        orcInputFormat.getRecordReader(split, jobConf, Reporter.NULL);
    }
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: open to read data - split 1 => mock:/mocktable4/0_0
    // call-2: open to read data - split 2 => mock:/mocktable4/0_1
    assertEquals(2, readOpsDelta);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
}
Also used : JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 90 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestInputOutputFormat method testACIDReaderFooterSerialize.

@Test
public void testACIDReaderFooterSerialize() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    MockPath mockPath = new MockPath(fs, "mock:///mocktable6");
    conf.set(ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN.varname, "true");
    conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    conf.set("hive.orc.splits.include.file.footer", "true");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    for (InputSplit split : splits) {
        assertTrue("OrcSplit is expected", split instanceof OrcSplit);
        // ETL strategies will have start=3 (start of first stripe)
        assertTrue(split.toString().contains("start=3"));
        assertTrue(split.toString().contains("hasFooter=true"));
        assertTrue(split.toString().contains("hasBase=true"));
        assertTrue(split.toString().contains("deltas=0"));
        assertTrue(split.toString().contains("isOriginal=true"));
        if (split instanceof OrcSplit) {
            assertTrue("Footer serialize test for ACID reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter());
        }
        orcInputFormat.getRecordReader(split, conf, Reporter.NULL);
    }
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: open to read data - split 1 => mock:/mocktable6/0_0
    // call-2: AcidUtils.getAcidState - split 1 => ls mock:/mocktable6
    // call-3: open to read data - split 2 => mock:/mocktable6/0_1
    // call-4: AcidUtils.getAcidState - split 2 => ls mock:/mocktable6
    // call-5: read footer - split 2 => mock:/mocktable6/0_0 (to get offset since it's original file)
    // call-6: file stat - split 2 => mock:/mocktable6/0_0
    assertEquals(6, readOpsDelta);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
}
Also used : InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)73 Test (org.junit.Test)64 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)60 Configuration (org.apache.hadoop.conf.Configuration)25 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)25 InputSplit (org.apache.hadoop.mapred.InputSplit)25 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)24 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)23 Properties (java.util.Properties)20 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)20 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)18 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)18 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)18 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)18 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)18 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)18 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)18 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)18 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)18 RecordWriter (org.apache.hadoop.mapred.RecordWriter)18