Search in sources :

Example 41 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestVectorExpressionWriters method testStructLong.

private void testStructLong(TypeInfo type) throws HiveException {
    LongColumnVector icv = VectorizedRowGroupGenUtil.generateLongColumnVector(true, false, vectorSize, new Random(10));
    icv.isNull[3] = true;
    LongColumnVector bcv = VectorizedRowGroupGenUtil.generateLongColumnVector(true, false, vectorSize, new Random(10));
    bcv.isNull[2] = true;
    ArrayList<Object>[] values = (ArrayList<Object>[]) new ArrayList[this.vectorSize];
    StructObjectInspector soi = genStructOI();
    VectorExpressionWriter[] vew = VectorExpressionWriterFactory.getExpressionWriters(soi);
    for (int i = 0; i < vectorSize; i++) {
        values[i] = new ArrayList<Object>(2);
        values[i].add(null);
        values[i].add(null);
        vew[0].setValue(values[i], icv, i);
        vew[1].setValue(values[i], bcv, i);
        Object theInt = values[i].get(0);
        if (theInt == null) {
            Assert.assertTrue(icv.isNull[i]);
        } else {
            IntWritable w = (IntWritable) theInt;
            Assert.assertEquals((int) icv.vector[i], w.get());
        }
        Object theBool = values[i].get(1);
        if (theBool == null) {
            Assert.assertTrue(bcv.isNull[i]);
        } else {
            BooleanWritable w = (BooleanWritable) theBool;
            Assert.assertEquals(bcv.vector[i] == 0 ? false : true, w.get());
        }
    }
}
Also used : Random(java.util.Random) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ArrayList(java.util.ArrayList) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) IntWritable(org.apache.hadoop.io.IntWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 42 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestOrcStruct method testInspectorFromTypeInfo.

@Test
public void testInspectorFromTypeInfo() throws Exception {
    TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString("struct<c1:boolean,c2:tinyint" + ",c3:smallint,c4:int,c5:bigint,c6:float,c7:double,c8:binary," + "c9:string,c10:struct<c1:int>,c11:map<int,int>,c12:uniontype<int>" + ",c13:array<timestamp>>");
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(typeInfo);
    assertEquals("struct<c1:boolean,c2:tinyint,c3:smallint,c4:int,c5:" + "bigint,c6:float,c7:double,c8:binary,c9:string,c10:struct<" + "c1:int>,c11:map<int,int>,c12:uniontype<int>,c13:array<timestamp>>", inspector.getTypeName());
    assertEquals(null, inspector.getAllStructFieldRefs().get(0).getFieldComment());
    assertEquals(null, inspector.getStructFieldRef("UNKNOWN"));
    OrcStruct s1 = new OrcStruct(13);
    for (int i = 0; i < 13; ++i) {
        s1.setFieldValue(i, i);
    }
    List<Object> list = new ArrayList<Object>();
    list.addAll(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12));
    assertEquals(list, inspector.getStructFieldsDataAsList(s1));
    ListObjectInspector listOI = (ListObjectInspector) inspector.getAllStructFieldRefs().get(12).getFieldObjectInspector();
    assertEquals(ObjectInspector.Category.LIST, listOI.getCategory());
    assertEquals(10, listOI.getListElement(list, 10));
    assertEquals(null, listOI.getListElement(list, -1));
    assertEquals(null, listOI.getListElement(list, 13));
    assertEquals(13, listOI.getListLength(list));
    Map<Integer, Integer> map = new HashMap<Integer, Integer>();
    map.put(1, 2);
    map.put(2, 4);
    map.put(3, 6);
    MapObjectInspector mapOI = (MapObjectInspector) inspector.getAllStructFieldRefs().get(10).getFieldObjectInspector();
    assertEquals(3, mapOI.getMapSize(map));
    assertEquals(4, mapOI.getMapValueElement(map, 2));
}
Also used : HashMap(java.util.HashMap) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 43 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestOrcRawRecordMerger method testNewBase.

@Test
public void testNewBase() throws Exception {
    Configuration conf = new Configuration();
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "col1");
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "string");
    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
    Reader reader = Mockito.mock(Reader.class, settings);
    RecordReader recordReader = Mockito.mock(RecordReader.class, settings);
    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
    OrcProto.Type.Builder typeBuilder = OrcProto.Type.newBuilder();
    typeBuilder.setKind(OrcProto.Type.Kind.STRUCT).addSubtypes(1).addSubtypes(2).addSubtypes(3).addSubtypes(4).addSubtypes(5).addSubtypes(6);
    typeBuilder.addAllFieldNames(Lists.newArrayList("operation", "originalTransaction", "bucket", "rowId", "currentTransaction", "row"));
    types.add(typeBuilder.build());
    types.add(null);
    types.add(null);
    types.add(null);
    types.add(null);
    types.add(null);
    typeBuilder.clearSubtypes();
    typeBuilder.addSubtypes(7);
    typeBuilder.addAllFieldNames(Lists.newArrayList("col1"));
    types.add(typeBuilder.build());
    typeBuilder.clear();
    typeBuilder.setKind(OrcProto.Type.Kind.STRING);
    types.add(typeBuilder.build());
    Mockito.when(reader.getTypes()).thenReturn(types);
    Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class))).thenReturn(recordReader);
    OrcStruct row1 = new OrcStruct(OrcRecordUpdater.FIELDS);
    setRow(row1, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 20, 100, "first");
    OrcStruct row2 = new OrcStruct(OrcRecordUpdater.FIELDS);
    setRow(row2, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 30, 110, "second");
    OrcStruct row3 = new OrcStruct(OrcRecordUpdater.FIELDS);
    setRow(row3, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 40, 120, "third");
    OrcStruct row4 = new OrcStruct(OrcRecordUpdater.FIELDS);
    setRow(row4, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth");
    OrcStruct row5 = new OrcStruct(OrcRecordUpdater.FIELDS);
    setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth");
    Mockito.when(recordReader.hasNext()).thenReturn(true, true, true, true, true, false);
    Mockito.when(recordReader.getProgress()).thenReturn(1.0f);
    Mockito.when(recordReader.next(null)).thenReturn(row1, row4);
    Mockito.when(recordReader.next(row1)).thenReturn(row2);
    Mockito.when(recordReader.next(row2)).thenReturn(row3);
    Mockito.when(recordReader.next(row3)).thenReturn(row5);
    Mockito.when(reader.getMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME)).thenReturn(ByteBuffer.wrap("10,20,30;40,50,60;40,50,61".getBytes("UTF-8")));
    Mockito.when(reader.getStripes()).thenReturn(createStripes(2, 2, 1));
    OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, false, reader, false, 10, createMaximalTxnList(), new Reader.Options().range(1000, 1000), null);
    RecordReader rr = merger.getCurrentReader().recordReader;
    assertEquals(0, merger.getOtherReaders().size());
    assertEquals(new RecordIdentifier(10, 20, 30), merger.getMinKey());
    assertEquals(new RecordIdentifier(40, 50, 60), merger.getMaxKey());
    RecordIdentifier id = merger.createKey();
    OrcStruct event = merger.createValue();
    assertEquals(true, merger.next(id, event));
    assertEquals(10, id.getTransactionId());
    assertEquals(20, id.getBucketId());
    assertEquals(40, id.getRowId());
    assertEquals("third", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(40, id.getTransactionId());
    assertEquals(50, id.getBucketId());
    assertEquals(60, id.getRowId());
    assertEquals("fourth", getValue(event));
    assertEquals(false, merger.next(id, event));
    assertEquals(1.0, merger.getProgress(), 0.01);
    merger.close();
    Mockito.verify(rr).close();
    Mockito.verify(rr).getProgress();
    StructObjectInspector eventObjectInspector = (StructObjectInspector) merger.getObjectInspector();
    List<? extends StructField> fields = eventObjectInspector.getAllStructFieldRefs();
    assertEquals(OrcRecordUpdater.FIELDS, fields.size());
    assertEquals("operation", fields.get(OrcRecordUpdater.OPERATION).getFieldName());
    assertEquals("currentTransaction", fields.get(OrcRecordUpdater.CURRENT_TRANSACTION).getFieldName());
    assertEquals("originalTransaction", fields.get(OrcRecordUpdater.ORIGINAL_TRANSACTION).getFieldName());
    assertEquals("bucket", fields.get(OrcRecordUpdater.BUCKET).getFieldName());
    assertEquals("rowId", fields.get(OrcRecordUpdater.ROW_ID).getFieldName());
    StructObjectInspector rowObjectInspector = (StructObjectInspector) fields.get(OrcRecordUpdater.ROW).getFieldObjectInspector();
    assertEquals("col1", rowObjectInspector.getAllStructFieldRefs().get(0).getFieldName());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) OrcProto(org.apache.orc.OrcProto) ArrayList(java.util.ArrayList) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 44 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestParquetSerDe method deserializeAndSerializeLazySimple.

private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) throws SerDeException {
    // Get the row structure
    final StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
    // Deserialize
    final Object row = serDe.deserialize(t);
    assertEquals("deserialization gives the wrong object class", row.getClass(), ArrayWritable.class);
    assertEquals("size correct after deserialization", serDe.getSerDeStats().getRawDataSize(), t.get().length);
    assertEquals("deserialization gives the wrong object", t, row);
    // Serialize
    final ParquetHiveRecord serializedArr = (ParquetHiveRecord) serDe.serialize(row, oi);
    assertEquals("size correct after serialization", serDe.getSerDeStats().getRawDataSize(), ((ArrayWritable) serializedArr.getObject()).get().length);
    assertTrue("serialized object should be equal to starting object", arrayWritableEquals(t, (ArrayWritable) serializedArr.getObject()));
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) ParquetHiveRecord(org.apache.hadoop.hive.serde2.io.ParquetHiveRecord) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 45 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestParquetRowGroupFilter method testRowGroupFilterTakeEffect.

@Test
public void testRowGroupFilterTakeEffect() throws Exception {
    // define schema
    columnNames = "intCol";
    columnTypes = "int";
    StructObjectInspector inspector = getObjectInspector(columnNames, columnTypes);
    MessageType fileSchema = MessageTypeParser.parseMessageType("message hive_schema {\n" + "  optional int32 intCol;\n" + "}\n");
    conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "intCol");
    conf.set("columns", "intCol");
    conf.set("columns.types", "int");
    // create Parquet file with specific data
    Path testPath = writeDirect("RowGroupFilterTakeEffect", fileSchema, new DirectWriter() {

        @Override
        public void write(RecordConsumer consumer) {
            for (int i = 0; i < 100; i++) {
                consumer.startMessage();
                consumer.startField("int", 0);
                consumer.addInteger(i);
                consumer.endField("int", 0);
                consumer.endMessage();
            }
        }
    });
    // > 50
    GenericUDF udf = new GenericUDFOPGreaterThan();
    List<ExprNodeDesc> children = Lists.newArrayList();
    ExprNodeColumnDesc columnDesc = new ExprNodeColumnDesc(Integer.class, "intCol", "T", false);
    ExprNodeConstantDesc constantDesc = new ExprNodeConstantDesc(50);
    children.add(columnDesc);
    children.add(constantDesc);
    ExprNodeGenericFuncDesc genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children);
    String searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr);
    ParquetRecordReaderWrapper recordReader = (ParquetRecordReaderWrapper) new MapredParquetInputFormat().getRecordReader(new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null);
    Assert.assertEquals("row group is not filtered correctly", 1, recordReader.getFiltedBlocks().size());
    // > 100
    constantDesc = new ExprNodeConstantDesc(100);
    children.set(1, constantDesc);
    genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children);
    searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr);
    recordReader = (ParquetRecordReaderWrapper) new MapredParquetInputFormat().getRecordReader(new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null);
    Assert.assertEquals("row group is not filtered correctly", 0, recordReader.getFiltedBlocks().size());
}
Also used : Path(org.apache.hadoop.fs.Path) GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ParquetRecordReaderWrapper(org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) FileSplit(org.apache.hadoop.mapred.FileSplit) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) MessageType(org.apache.parquet.schema.MessageType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)232 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)113 ArrayList (java.util.ArrayList)84 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)69 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)46 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)42 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)42 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)40 Test (org.junit.Test)38 Properties (java.util.Properties)35 Text (org.apache.hadoop.io.Text)32 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)30 Path (org.apache.hadoop.fs.Path)29 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)27 IOException (java.io.IOException)25 Configuration (org.apache.hadoop.conf.Configuration)25 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)24 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)24 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)23 InputSplit (org.apache.hadoop.mapred.InputSplit)23