use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class VectorizedBatchUtil method setVector.
private static void setVector(Object row, StructObjectInspector oi, StructField field, VectorizedRowBatch batch, DataOutputBuffer buffer, int rowIndex, int colIndex, int offset) throws HiveException {
Object fieldData = oi.getStructFieldData(row, field);
ObjectInspector foi = field.getFieldObjectInspector();
// Vectorization only supports PRIMITIVE data types. Assert the same
assert (foi.getCategory() == Category.PRIMITIVE);
// Get writable object
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
Object writableCol = poi.getPrimitiveWritableObject(fieldData);
// float/double. String types have no default value for null.
switch(poi.getPrimitiveCategory()) {
case BOOLEAN:
{
LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
lcv.isNull[rowIndex] = false;
} else {
lcv.vector[rowIndex] = 1;
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case BYTE:
{
LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
lcv.isNull[rowIndex] = false;
} else {
lcv.vector[rowIndex] = 1;
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case SHORT:
{
LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
lcv.vector[rowIndex] = ((ShortWritable) writableCol).get();
lcv.isNull[rowIndex] = false;
} else {
lcv.vector[rowIndex] = 1;
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case INT:
{
LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
lcv.vector[rowIndex] = ((IntWritable) writableCol).get();
lcv.isNull[rowIndex] = false;
} else {
lcv.vector[rowIndex] = 1;
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case LONG:
{
LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
lcv.vector[rowIndex] = ((LongWritable) writableCol).get();
lcv.isNull[rowIndex] = false;
} else {
lcv.vector[rowIndex] = 1;
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case DATE:
{
LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
lcv.vector[rowIndex] = ((DateWritableV2) writableCol).getDays();
lcv.isNull[rowIndex] = false;
} else {
lcv.vector[rowIndex] = 1;
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case FLOAT:
{
DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
dcv.vector[rowIndex] = ((FloatWritable) writableCol).get();
dcv.isNull[rowIndex] = false;
} else {
dcv.vector[rowIndex] = Double.NaN;
setNullColIsNullValue(dcv, rowIndex);
}
}
break;
case DOUBLE:
{
DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get();
dcv.isNull[rowIndex] = false;
} else {
dcv.vector[rowIndex] = Double.NaN;
setNullColIsNullValue(dcv, rowIndex);
}
}
break;
case TIMESTAMP:
{
TimestampColumnVector lcv = (TimestampColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
lcv.set(rowIndex, ((TimestampWritableV2) writableCol).getTimestamp().toSqlTimestamp());
lcv.isNull[rowIndex] = false;
} else {
lcv.setNullValue(rowIndex);
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case INTERVAL_YEAR_MONTH:
{
LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
HiveIntervalYearMonth i = ((HiveIntervalYearMonthWritable) writableCol).getHiveIntervalYearMonth();
lcv.vector[rowIndex] = i.getTotalMonths();
lcv.isNull[rowIndex] = false;
} else {
lcv.vector[rowIndex] = 1;
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case INTERVAL_DAY_TIME:
{
IntervalDayTimeColumnVector icv = (IntervalDayTimeColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
HiveIntervalDayTime idt = ((HiveIntervalDayTimeWritable) writableCol).getHiveIntervalDayTime();
icv.set(rowIndex, idt);
icv.isNull[rowIndex] = false;
} else {
icv.setNullValue(rowIndex);
setNullColIsNullValue(icv, rowIndex);
}
}
break;
case BINARY:
{
BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
bcv.isNull[rowIndex] = false;
BytesWritable bw = (BytesWritable) writableCol;
byte[] bytes = bw.getBytes();
int start = buffer.getLength();
int length = bw.getLength();
try {
buffer.write(bytes, 0, length);
} catch (IOException ioe) {
throw new IllegalStateException("bad write", ioe);
}
bcv.setRef(rowIndex, buffer.getData(), start, length);
} else {
setNullColIsNullValue(bcv, rowIndex);
}
}
break;
case STRING:
{
BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
bcv.isNull[rowIndex] = false;
Text colText = (Text) writableCol;
int start = buffer.getLength();
int length = colText.getLength();
try {
buffer.write(colText.getBytes(), 0, length);
} catch (IOException ioe) {
throw new IllegalStateException("bad write", ioe);
}
bcv.setRef(rowIndex, buffer.getData(), start, length);
} else {
setNullColIsNullValue(bcv, rowIndex);
}
}
break;
case CHAR:
{
BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
bcv.isNull[rowIndex] = false;
HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar();
byte[] bytes = colHiveChar.getStrippedValue().getBytes();
// We assume the CHAR maximum length was enforced when the object was created.
int length = bytes.length;
int start = buffer.getLength();
try {
// In vector mode, we store CHAR as unpadded.
buffer.write(bytes, 0, length);
} catch (IOException ioe) {
throw new IllegalStateException("bad write", ioe);
}
bcv.setRef(rowIndex, buffer.getData(), start, length);
} else {
setNullColIsNullValue(bcv, rowIndex);
}
}
break;
case VARCHAR:
{
BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
bcv.isNull[rowIndex] = false;
HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar();
byte[] bytes = colHiveVarchar.getValue().getBytes();
// We assume the VARCHAR maximum length was enforced when the object was created.
int length = bytes.length;
int start = buffer.getLength();
try {
buffer.write(bytes, 0, length);
} catch (IOException ioe) {
throw new IllegalStateException("bad write", ioe);
}
bcv.setRef(rowIndex, buffer.getData(), start, length);
} else {
setNullColIsNullValue(bcv, rowIndex);
}
}
break;
case DECIMAL:
DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
dcv.isNull[rowIndex] = false;
HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol;
dcv.set(rowIndex, wobj);
} else {
setNullColIsNullValue(dcv, rowIndex);
}
break;
default:
throw new HiveException("Vectorizaton is not supported for datatype:" + poi.getPrimitiveCategory());
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class VectorRandomRowSource method getObjectInspector.
public static ObjectInspector getObjectInspector(TypeInfo typeInfo, DataTypePhysicalVariation dataTypePhysicalVariation) {
final ObjectInspector objectInspector;
switch(typeInfo.getCategory()) {
case PRIMITIVE:
{
final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
if (primitiveTypeInfo instanceof DecimalTypeInfo && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
objectInspector = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.longTypeInfo);
} else {
objectInspector = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo);
}
}
break;
case MAP:
{
final MapTypeInfo mapType = (MapTypeInfo) typeInfo;
final MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector(getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo()));
objectInspector = mapInspector;
}
break;
case LIST:
{
final ListTypeInfo listType = (ListTypeInfo) typeInfo;
final ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector(getObjectInspector(listType.getListElementTypeInfo()));
objectInspector = listInspector;
}
break;
case STRUCT:
{
final StructTypeInfo structType = (StructTypeInfo) typeInfo;
final List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos();
final List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
for (TypeInfo fieldType : fieldTypes) {
fieldInspectors.add(getObjectInspector(fieldType));
}
final StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(structType.getAllStructFieldNames(), fieldInspectors);
objectInspector = structInspector;
}
break;
case UNION:
{
final UnionTypeInfo unionType = (UnionTypeInfo) typeInfo;
final List<TypeInfo> fieldTypes = unionType.getAllUnionObjectTypeInfos();
final List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
for (TypeInfo fieldType : fieldTypes) {
fieldInspectors.add(getObjectInspector(fieldType));
}
final UnionObjectInspector unionInspector = ObjectInspectorFactory.getStandardUnionObjectInspector(fieldInspectors);
objectInspector = unionInspector;
}
break;
default:
throw new RuntimeException("Unexpected category " + typeInfo.getCategory());
}
Preconditions.checkState(objectInspector != null);
return objectInspector;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class VectorRandomRowSource method randomRow.
public Object[] randomRow() {
final Object[] row = new Object[columnCount];
if (generationSpecList == null) {
for (int c = 0; c < columnCount; c++) {
row[c] = randomWritable(c);
}
} else {
for (int c = 0; c < columnCount; c++) {
GenerationSpec generationSpec = generationSpecList.get(c);
GenerationSpec.GenerationKind generationKind = generationSpec.getGenerationKind();
Object object;
switch(generationKind) {
case SAME_TYPE:
object = randomWritable(c, generationSpec.getColumnAllowNulls());
break;
case OMIT_GENERATION:
object = null;
break;
case STRING_FAMILY:
{
TypeInfo typeInfo = generationSpec.getTypeInfo();
StringGenerationOption stringGenerationOption = generationSpec.getStringGenerationOption();
object = randomStringFamily(r, typeInfo, stringGenerationOption, true);
}
break;
case STRING_FAMILY_OTHER_TYPE_VALUE:
{
TypeInfo typeInfo = generationSpec.getTypeInfo();
TypeInfo otherTypeTypeInfo = generationSpec.getSourceTypeInfo();
object = randomStringFamilyOtherTypeValue(r, typeInfo, otherTypeTypeInfo, true);
}
break;
case TIMESTAMP_MILLISECONDS:
{
LongWritable longWritable = (LongWritable) randomWritable(c);
if (longWritable != null) {
while (true) {
long longValue = longWritable.get();
if (longValue >= MIN_FOUR_DIGIT_YEAR_MILLIS && longValue <= MAX_FOUR_DIGIT_YEAR_MILLIS) {
break;
}
longWritable.set((Long) VectorRandomRowSource.randomPrimitiveObject(r, TypeInfoFactory.longTypeInfo));
}
}
object = longWritable;
}
break;
case VALUE_LIST:
{
List<Object> valueList = generationSpec.getValueList();
final int valueCount = valueList.size();
TypeInfo typeInfo = generationSpec.getTypeInfo();
Category category = typeInfo.getCategory();
switch(category) {
case PRIMITIVE:
case STRUCT:
object = valueList.get(r.nextInt(valueCount));
break;
case LIST:
{
final int elementCount = r.nextInt(valueCount);
ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
final ObjectInspector elementObjectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(elementTypeInfo);
List<Object> list = new ArrayList<Object>(elementCount);
for (int i = 0; i < elementCount; i++) {
Object elementWritable = randomWritable(elementTypeInfo, elementObjectInspector, allowNull);
list.add(elementWritable);
}
object = list;
}
break;
case MAP:
{
final int elementCount = r.nextInt(valueCount);
MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
final ObjectInspector valueObjectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(valueTypeInfo);
Map<Object, Object> map = new HashMap<Object, Object>(elementCount);
for (int i = 0; i < elementCount; i++) {
Object key = valueList.get(r.nextInt(valueCount));
Object valueWritable = randomWritable(valueTypeInfo, valueObjectInspector, allowNull);
if (!map.containsKey(key)) {
map.put(key, valueWritable);
}
}
object = map;
}
break;
default:
throw new RuntimeException("Unexpected category " + category);
}
}
break;
default:
throw new RuntimeException("Unexpected generationKind " + generationKind);
}
row[c] = object;
}
}
return row;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class TestArrowColumnarBatchSerDe method serializeAndDeserialize.
private void serializeAndDeserialize(ArrowColumnarBatchSerDe serDe, Object[][] rows, StructObjectInspector rowOI, int[] selectedRows) {
ArrowWrapperWritable serialized = null;
for (Object[] row : rows) {
serialized = serDe.serialize(row, rowOI);
}
// When obj is null serialized is not Null -- is this expected?
// assertTrue(serialized == null);
boolean useNativeSelected = selectedRows != null && selectedRows.length > 0;
// Pass null to complete a batch
if (serialized == null) {
// Native-selected mode (triggering Serializer.writePrimitive)
if (useNativeSelected) {
serDe.serializer.vectorizedRowBatch.selectedInUse = true;
serDe.serializer.vectorizedRowBatch.size = selectedRows.length;
serDe.serializer.vectorizedRowBatch.selected = selectedRows;
// Call Native serialization directly
serialized = serDe.serializer.serializeBatch(serDe.serializer.vectorizedRowBatch, true);
} else {
// Non-native mode
serialized = serDe.serialize(null, rowOI);
}
}
final Object[][] deserializedRows = (Object[][]) serDe.deserialize(serialized);
for (int rowIndex = 0; rowIndex < Math.min(deserializedRows.length, rows.length); rowIndex++) {
// expected row is either at rowIndex or selected[rowIndex]
final Object[] row = useNativeSelected ? rows[selectedRows[rowIndex]] : rows[rowIndex];
final Object[] deserializedRow = deserializedRows[rowIndex];
assertEquals(row.length, deserializedRow.length);
final List<? extends StructField> fields = rowOI.getAllStructFieldRefs();
for (int fieldIndex = 0; fieldIndex < fields.size(); fieldIndex++) {
final StructField field = fields.get(fieldIndex);
final ObjectInspector fieldObjInspector = field.getFieldObjectInspector();
switch(fieldObjInspector.getCategory()) {
case PRIMITIVE:
final PrimitiveObjectInspector primitiveObjInspector = (PrimitiveObjectInspector) fieldObjInspector;
switch(primitiveObjInspector.getPrimitiveCategory()) {
case STRING:
case VARCHAR:
case CHAR:
assertEquals(Objects.toString(row[fieldIndex]), Objects.toString(deserializedRow[fieldIndex]));
break;
default:
assertEquals(row[fieldIndex], deserializedRow[fieldIndex]);
break;
}
break;
case STRUCT:
final Object[] rowStruct = (Object[]) row[fieldIndex];
final List deserializedRowStruct = (List) deserializedRow[fieldIndex];
if (rowStruct == null) {
assertNull(deserializedRowStruct);
} else {
assertArrayEquals(rowStruct, deserializedRowStruct.toArray());
}
break;
case LIST:
case UNION:
assertEquals(row[fieldIndex], deserializedRow[fieldIndex]);
break;
case MAP:
final Map rowMap = (Map) row[fieldIndex];
final Map deserializedRowMap = (Map) deserializedRow[fieldIndex];
if (rowMap == null) {
assertNull(deserializedRowMap);
} else {
final Set rowMapKeySet = rowMap.keySet();
final Set deserializedRowMapKeySet = deserializedRowMap.keySet();
assertEquals(rowMapKeySet, deserializedRowMapKeySet);
for (Object key : rowMapKeySet) {
assertEquals(rowMap.get(key), deserializedRowMap.get(key));
}
}
break;
}
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class ArrowColumnarBatchSerDe method toField.
private static Field toField(String name, TypeInfo typeInfo) {
switch(typeInfo.getCategory()) {
case PRIMITIVE:
final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
switch(primitiveTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
return Field.nullable(name, MinorType.BIT.getType());
case BYTE:
return Field.nullable(name, MinorType.TINYINT.getType());
case SHORT:
return Field.nullable(name, MinorType.SMALLINT.getType());
case INT:
return Field.nullable(name, MinorType.INT.getType());
case LONG:
return Field.nullable(name, MinorType.BIGINT.getType());
case FLOAT:
return Field.nullable(name, MinorType.FLOAT4.getType());
case DOUBLE:
return Field.nullable(name, MinorType.FLOAT8.getType());
case STRING:
case VARCHAR:
case CHAR:
return Field.nullable(name, MinorType.VARCHAR.getType());
case DATE:
return Field.nullable(name, MinorType.DATEDAY.getType());
case TIMESTAMP:
return Field.nullable(name, MinorType.TIMESTAMPMILLI.getType());
case TIMESTAMPLOCALTZ:
final TimestampLocalTZTypeInfo timestampLocalTZTypeInfo = (TimestampLocalTZTypeInfo) typeInfo;
final String timeZone = timestampLocalTZTypeInfo.getTimeZone().toString();
return Field.nullable(name, new ArrowType.Timestamp(TimeUnit.MILLISECOND, timeZone));
case BINARY:
return Field.nullable(name, MinorType.VARBINARY.getType());
case DECIMAL:
final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
final int precision = decimalTypeInfo.precision();
final int scale = decimalTypeInfo.scale();
return Field.nullable(name, new ArrowType.Decimal(precision, scale));
case INTERVAL_YEAR_MONTH:
return Field.nullable(name, MinorType.INTERVALYEAR.getType());
case INTERVAL_DAY_TIME:
return Field.nullable(name, MinorType.INTERVALDAY.getType());
default:
throw new IllegalArgumentException();
}
case LIST:
final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
final TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
return new Field(name, FieldType.nullable(MinorType.LIST.getType()), Lists.newArrayList(toField(DEFAULT_ARROW_FIELD_NAME, elementTypeInfo)));
case STRUCT:
final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
final List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
final List<Field> structFields = Lists.newArrayList();
final int structSize = fieldNames.size();
for (int i = 0; i < structSize; i++) {
structFields.add(toField(fieldNames.get(i), fieldTypeInfos.get(i)));
}
return new Field(name, FieldType.nullable(MinorType.STRUCT.getType()), structFields);
case UNION:
final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
final List<Field> unionFields = Lists.newArrayList();
final int unionSize = unionFields.size();
for (int i = 0; i < unionSize; i++) {
unionFields.add(toField(DEFAULT_ARROW_FIELD_NAME, objectTypeInfos.get(i)));
}
return new Field(name, FieldType.nullable(MinorType.UNION.getType()), unionFields);
case MAP:
final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
final TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
final TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
final List<Field> mapFields = Lists.newArrayList();
mapFields.add(toField(name + "_keys", keyTypeInfo));
mapFields.add(toField(name + "_values", valueTypeInfo));
FieldType struct = new FieldType(false, new ArrowType.Struct(), null);
List<Field> childrenOfList = Lists.newArrayList(new Field(name, struct, mapFields));
return new Field(name, FieldType.nullable(MinorType.LIST.getType()), childrenOfList);
default:
throw new IllegalArgumentException();
}
}
Aggregations