use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestVectorExpressionWriters method testStructLong.
private void testStructLong(TypeInfo type) throws HiveException {
LongColumnVector icv = VectorizedRowGroupGenUtil.generateLongColumnVector(true, false, vectorSize, new Random(10));
icv.isNull[3] = true;
LongColumnVector bcv = VectorizedRowGroupGenUtil.generateLongColumnVector(true, false, vectorSize, new Random(10));
bcv.isNull[2] = true;
ArrayList<Object>[] values = (ArrayList<Object>[]) new ArrayList[this.vectorSize];
StructObjectInspector soi = genStructOI();
VectorExpressionWriter[] vew = VectorExpressionWriterFactory.getExpressionWriters(soi);
for (int i = 0; i < vectorSize; i++) {
values[i] = new ArrayList<Object>(2);
values[i].add(null);
values[i].add(null);
vew[0].setValue(values[i], icv, i);
vew[1].setValue(values[i], bcv, i);
Object theInt = values[i].get(0);
if (theInt == null) {
Assert.assertTrue(icv.isNull[i]);
} else {
IntWritable w = (IntWritable) theInt;
Assert.assertEquals((int) icv.vector[i], w.get());
}
Object theBool = values[i].get(1);
if (theBool == null) {
Assert.assertTrue(bcv.isNull[i]);
} else {
BooleanWritable w = (BooleanWritable) theBool;
Assert.assertEquals(bcv.vector[i] == 0 ? false : true, w.get());
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestOrcStruct method testInspectorFromTypeInfo.
@Test
public void testInspectorFromTypeInfo() throws Exception {
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString("struct<c1:boolean,c2:tinyint" + ",c3:smallint,c4:int,c5:bigint,c6:float,c7:double,c8:binary," + "c9:string,c10:struct<c1:int>,c11:map<int,int>,c12:uniontype<int>" + ",c13:array<timestamp>>");
StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(typeInfo);
assertEquals("struct<c1:boolean,c2:tinyint,c3:smallint,c4:int,c5:" + "bigint,c6:float,c7:double,c8:binary,c9:string,c10:struct<" + "c1:int>,c11:map<int,int>,c12:uniontype<int>,c13:array<timestamp>>", inspector.getTypeName());
assertEquals(null, inspector.getAllStructFieldRefs().get(0).getFieldComment());
assertEquals(null, inspector.getStructFieldRef("UNKNOWN"));
OrcStruct s1 = new OrcStruct(13);
for (int i = 0; i < 13; ++i) {
s1.setFieldValue(i, i);
}
List<Object> list = new ArrayList<Object>();
list.addAll(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12));
assertEquals(list, inspector.getStructFieldsDataAsList(s1));
ListObjectInspector listOI = (ListObjectInspector) inspector.getAllStructFieldRefs().get(12).getFieldObjectInspector();
assertEquals(ObjectInspector.Category.LIST, listOI.getCategory());
assertEquals(10, listOI.getListElement(list, 10));
assertEquals(null, listOI.getListElement(list, -1));
assertEquals(null, listOI.getListElement(list, 13));
assertEquals(13, listOI.getListLength(list));
Map<Integer, Integer> map = new HashMap<Integer, Integer>();
map.put(1, 2);
map.put(2, 4);
map.put(3, 6);
MapObjectInspector mapOI = (MapObjectInspector) inspector.getAllStructFieldRefs().get(10).getFieldObjectInspector();
assertEquals(3, mapOI.getMapSize(map));
assertEquals(4, mapOI.getMapValueElement(map, 2));
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestOrcRawRecordMerger method testNewBase.
@Test
public void testNewBase() throws Exception {
Configuration conf = new Configuration();
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "col1");
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "string");
HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
Reader reader = Mockito.mock(Reader.class, settings);
RecordReader recordReader = Mockito.mock(RecordReader.class, settings);
List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
OrcProto.Type.Builder typeBuilder = OrcProto.Type.newBuilder();
typeBuilder.setKind(OrcProto.Type.Kind.STRUCT).addSubtypes(1).addSubtypes(2).addSubtypes(3).addSubtypes(4).addSubtypes(5).addSubtypes(6);
typeBuilder.addAllFieldNames(Lists.newArrayList("operation", "originalTransaction", "bucket", "rowId", "currentTransaction", "row"));
types.add(typeBuilder.build());
types.add(null);
types.add(null);
types.add(null);
types.add(null);
types.add(null);
typeBuilder.clearSubtypes();
typeBuilder.addSubtypes(7);
typeBuilder.addAllFieldNames(Lists.newArrayList("col1"));
types.add(typeBuilder.build());
typeBuilder.clear();
typeBuilder.setKind(OrcProto.Type.Kind.STRING);
types.add(typeBuilder.build());
Mockito.when(reader.getTypes()).thenReturn(types);
Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class))).thenReturn(recordReader);
OrcStruct row1 = new OrcStruct(OrcRecordUpdater.FIELDS);
setRow(row1, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 20, 100, "first");
OrcStruct row2 = new OrcStruct(OrcRecordUpdater.FIELDS);
setRow(row2, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 30, 110, "second");
OrcStruct row3 = new OrcStruct(OrcRecordUpdater.FIELDS);
setRow(row3, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 40, 120, "third");
OrcStruct row4 = new OrcStruct(OrcRecordUpdater.FIELDS);
setRow(row4, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth");
OrcStruct row5 = new OrcStruct(OrcRecordUpdater.FIELDS);
setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth");
Mockito.when(recordReader.hasNext()).thenReturn(true, true, true, true, true, false);
Mockito.when(recordReader.getProgress()).thenReturn(1.0f);
Mockito.when(recordReader.next(null)).thenReturn(row1, row4);
Mockito.when(recordReader.next(row1)).thenReturn(row2);
Mockito.when(recordReader.next(row2)).thenReturn(row3);
Mockito.when(recordReader.next(row3)).thenReturn(row5);
Mockito.when(reader.getMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME)).thenReturn(ByteBuffer.wrap("10,20,30;40,50,60;40,50,61".getBytes("UTF-8")));
Mockito.when(reader.getStripes()).thenReturn(createStripes(2, 2, 1));
OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, false, reader, false, 10, createMaximalTxnList(), new Reader.Options().range(1000, 1000), null);
RecordReader rr = merger.getCurrentReader().recordReader;
assertEquals(0, merger.getOtherReaders().size());
assertEquals(new RecordIdentifier(10, 20, 30), merger.getMinKey());
assertEquals(new RecordIdentifier(40, 50, 60), merger.getMaxKey());
RecordIdentifier id = merger.createKey();
OrcStruct event = merger.createValue();
assertEquals(true, merger.next(id, event));
assertEquals(10, id.getTransactionId());
assertEquals(20, id.getBucketId());
assertEquals(40, id.getRowId());
assertEquals("third", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(40, id.getTransactionId());
assertEquals(50, id.getBucketId());
assertEquals(60, id.getRowId());
assertEquals("fourth", getValue(event));
assertEquals(false, merger.next(id, event));
assertEquals(1.0, merger.getProgress(), 0.01);
merger.close();
Mockito.verify(rr).close();
Mockito.verify(rr).getProgress();
StructObjectInspector eventObjectInspector = (StructObjectInspector) merger.getObjectInspector();
List<? extends StructField> fields = eventObjectInspector.getAllStructFieldRefs();
assertEquals(OrcRecordUpdater.FIELDS, fields.size());
assertEquals("operation", fields.get(OrcRecordUpdater.OPERATION).getFieldName());
assertEquals("currentTransaction", fields.get(OrcRecordUpdater.CURRENT_TRANSACTION).getFieldName());
assertEquals("originalTransaction", fields.get(OrcRecordUpdater.ORIGINAL_TRANSACTION).getFieldName());
assertEquals("bucket", fields.get(OrcRecordUpdater.BUCKET).getFieldName());
assertEquals("rowId", fields.get(OrcRecordUpdater.ROW_ID).getFieldName());
StructObjectInspector rowObjectInspector = (StructObjectInspector) fields.get(OrcRecordUpdater.ROW).getFieldObjectInspector();
assertEquals("col1", rowObjectInspector.getAllStructFieldRefs().get(0).getFieldName());
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestParquetSerDe method deserializeAndSerializeLazySimple.
private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) throws SerDeException {
// Get the row structure
final StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
// Deserialize
final Object row = serDe.deserialize(t);
assertEquals("deserialization gives the wrong object class", row.getClass(), ArrayWritable.class);
assertEquals("size correct after deserialization", serDe.getSerDeStats().getRawDataSize(), t.get().length);
assertEquals("deserialization gives the wrong object", t, row);
// Serialize
final ParquetHiveRecord serializedArr = (ParquetHiveRecord) serDe.serialize(row, oi);
assertEquals("size correct after serialization", serDe.getSerDeStats().getRawDataSize(), ((ArrayWritable) serializedArr.getObject()).get().length);
assertTrue("serialized object should be equal to starting object", arrayWritableEquals(t, (ArrayWritable) serializedArr.getObject()));
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestParquetRowGroupFilter method testRowGroupFilterTakeEffect.
@Test
public void testRowGroupFilterTakeEffect() throws Exception {
// define schema
columnNames = "intCol";
columnTypes = "int";
StructObjectInspector inspector = getObjectInspector(columnNames, columnTypes);
MessageType fileSchema = MessageTypeParser.parseMessageType("message hive_schema {\n" + " optional int32 intCol;\n" + "}\n");
conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "intCol");
conf.set("columns", "intCol");
conf.set("columns.types", "int");
// create Parquet file with specific data
Path testPath = writeDirect("RowGroupFilterTakeEffect", fileSchema, new DirectWriter() {
@Override
public void write(RecordConsumer consumer) {
for (int i = 0; i < 100; i++) {
consumer.startMessage();
consumer.startField("int", 0);
consumer.addInteger(i);
consumer.endField("int", 0);
consumer.endMessage();
}
}
});
// > 50
GenericUDF udf = new GenericUDFOPGreaterThan();
List<ExprNodeDesc> children = Lists.newArrayList();
ExprNodeColumnDesc columnDesc = new ExprNodeColumnDesc(Integer.class, "intCol", "T", false);
ExprNodeConstantDesc constantDesc = new ExprNodeConstantDesc(50);
children.add(columnDesc);
children.add(constantDesc);
ExprNodeGenericFuncDesc genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children);
String searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc);
conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr);
ParquetRecordReaderWrapper recordReader = (ParquetRecordReaderWrapper) new MapredParquetInputFormat().getRecordReader(new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null);
Assert.assertEquals("row group is not filtered correctly", 1, recordReader.getFiltedBlocks().size());
// > 100
constantDesc = new ExprNodeConstantDesc(100);
children.set(1, constantDesc);
genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children);
searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc);
conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr);
recordReader = (ParquetRecordReaderWrapper) new MapredParquetInputFormat().getRecordReader(new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null);
Assert.assertEquals("row group is not filtered correctly", 0, recordReader.getFiltedBlocks().size());
}
Aggregations