use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class VectorDeserializeRow method allocatePrimitiveField.
private Field allocatePrimitiveField(TypeInfo sourceTypeInfo, DataTypePhysicalVariation dataTypePhysicalVariation) {
final PrimitiveTypeInfo sourcePrimitiveTypeInfo = (PrimitiveTypeInfo) sourceTypeInfo;
final PrimitiveCategory sourcePrimitiveCategory = sourcePrimitiveTypeInfo.getPrimitiveCategory();
int maxLength = 0;
VectorBatchDeserializer deserializer;
switch(sourcePrimitiveCategory) {
case VOID:
deserializer = new VectorVoidDeserializer();
break;
case BOOLEAN:
deserializer = new VectorBooleanDeserializer();
break;
case BYTE:
deserializer = new VectorByteDeserializer();
break;
case SHORT:
deserializer = new VectorShortDeserializer();
break;
case INT:
deserializer = new VectorIntDeserializer();
break;
case LONG:
deserializer = new VectorLongDeserializer();
break;
case TIMESTAMP:
deserializer = new VectorTimestampDeserializer();
break;
case DATE:
deserializer = new VectorDateDeserializer();
break;
case FLOAT:
deserializer = new VectorFloatDeserializer();
break;
case DOUBLE:
deserializer = new VectorDoubleDeserializer();
break;
case BINARY:
deserializer = new VectorBinaryDeserializer();
break;
case STRING:
deserializer = new VectorStringDeserializer();
break;
case VARCHAR:
maxLength = ((VarcharTypeInfo) sourcePrimitiveTypeInfo).getLength();
deserializer = new VectorVarcharDeserializer();
break;
case CHAR:
maxLength = ((CharTypeInfo) sourcePrimitiveTypeInfo).getLength();
deserializer = new VectorCharDeserializer();
break;
case DECIMAL:
deserializer = new VectorDecimalDeserializer();
break;
case INTERVAL_YEAR_MONTH:
deserializer = new VectorIntervalYearMonthDeserializer();
break;
case INTERVAL_DAY_TIME:
deserializer = new VectorIntervalDayTimeDeserializer();
break;
default:
throw new RuntimeException("Primitive category " + sourcePrimitiveCategory + " not supported");
}
return new Field(sourcePrimitiveCategory, dataTypePhysicalVariation, maxLength, deserializer);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class VectorExtractRow method extractRowColumn.
public Object extractRowColumn(ColumnVector colVector, TypeInfo typeInfo, ObjectInspector objectInspector, int batchIndex) {
if (colVector == null) {
// may ask for them..
return null;
}
final int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex);
if (!colVector.noNulls && colVector.isNull[adjustedIndex]) {
return null;
}
final Category category = typeInfo.getCategory();
switch(category) {
case PRIMITIVE:
{
final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
final PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
final Writable primitiveWritable = VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory);
switch(primitiveCategory) {
case VOID:
return null;
case BOOLEAN:
((BooleanWritable) primitiveWritable).set(((LongColumnVector) colVector).vector[adjustedIndex] == 0 ? false : true);
return primitiveWritable;
case BYTE:
((ByteWritable) primitiveWritable).set((byte) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case SHORT:
((ShortWritable) primitiveWritable).set((short) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case INT:
((IntWritable) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case LONG:
((LongWritable) primitiveWritable).set(((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case TIMESTAMP:
// From java.sql.Timestamp used by vectorization to serializable org.apache.hadoop.hive.common.type.Timestamp
java.sql.Timestamp ts = ((TimestampColumnVector) colVector).asScratchTimestamp(adjustedIndex);
Timestamp serializableTS = Timestamp.ofEpochMilli(ts.getTime(), ts.getNanos());
((TimestampWritableV2) primitiveWritable).set(serializableTS);
return primitiveWritable;
case DATE:
((DateWritableV2) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case FLOAT:
((FloatWritable) primitiveWritable).set((float) ((DoubleColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case DOUBLE:
((DoubleWritable) primitiveWritable).set(((DoubleColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case BINARY:
{
final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
BytesWritable bytesWritable = (BytesWritable) primitiveWritable;
if (bytes == null || length == 0) {
if (length > 0) {
nullBytesReadError(primitiveCategory, batchIndex);
}
bytesWritable.set(EMPTY_BYTES, 0, 0);
} else {
bytesWritable.set(bytes, start, length);
}
return primitiveWritable;
}
case STRING:
{
final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
if (bytes == null || length == 0) {
if (length > 0) {
nullBytesReadError(primitiveCategory, batchIndex);
}
((Text) primitiveWritable).set(EMPTY_BYTES, 0, 0);
} else {
// Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
((Text) primitiveWritable).set(bytes, start, length);
}
return primitiveWritable;
}
case VARCHAR:
{
final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
final HiveVarcharWritable hiveVarcharWritable = (HiveVarcharWritable) primitiveWritable;
if (bytes == null || length == 0) {
if (length > 0) {
nullBytesReadError(primitiveCategory, batchIndex);
}
hiveVarcharWritable.set(EMPTY_STRING, -1);
} else {
final int adjustedLength = StringExpr.truncate(bytes, start, length, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
if (adjustedLength == 0) {
hiveVarcharWritable.set(EMPTY_STRING, -1);
} else {
hiveVarcharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), -1);
}
}
return primitiveWritable;
}
case CHAR:
{
final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
final HiveCharWritable hiveCharWritable = (HiveCharWritable) primitiveWritable;
final int maxLength = ((CharTypeInfo) primitiveTypeInfo).getLength();
if (bytes == null || length == 0) {
if (length > 0) {
nullBytesReadError(primitiveCategory, batchIndex);
}
hiveCharWritable.set(EMPTY_STRING, maxLength);
} else {
final int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, ((CharTypeInfo) primitiveTypeInfo).getLength());
if (adjustedLength == 0) {
hiveCharWritable.set(EMPTY_STRING, maxLength);
} else {
hiveCharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), maxLength);
}
}
return primitiveWritable;
}
case DECIMAL:
if (colVector instanceof Decimal64ColumnVector) {
Decimal64ColumnVector dec32ColVector = (Decimal64ColumnVector) colVector;
((HiveDecimalWritable) primitiveWritable).deserialize64(dec32ColVector.vector[adjustedIndex], dec32ColVector.scale);
} else {
// The HiveDecimalWritable set method will quickly copy the deserialized decimal writable fields.
((HiveDecimalWritable) primitiveWritable).set(((DecimalColumnVector) colVector).vector[adjustedIndex]);
}
return primitiveWritable;
case INTERVAL_YEAR_MONTH:
((HiveIntervalYearMonthWritable) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case INTERVAL_DAY_TIME:
((HiveIntervalDayTimeWritable) primitiveWritable).set(((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedIndex));
return primitiveWritable;
default:
throw new RuntimeException("Primitive category " + primitiveCategory.name() + " not supported");
}
}
case LIST:
{
final ListColumnVector listColumnVector = (ListColumnVector) colVector;
final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
final ListObjectInspector listObjectInspector = (ListObjectInspector) objectInspector;
final int offset = (int) listColumnVector.offsets[adjustedIndex];
final int size = (int) listColumnVector.lengths[adjustedIndex];
final List list = new ArrayList();
for (int i = 0; i < size; i++) {
list.add(extractRowColumn(listColumnVector.child, listTypeInfo.getListElementTypeInfo(), listObjectInspector.getListElementObjectInspector(), offset + i));
}
return list;
}
case MAP:
{
final MapColumnVector mapColumnVector = (MapColumnVector) colVector;
final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
final MapObjectInspector mapObjectInspector = (MapObjectInspector) objectInspector;
final int offset = (int) mapColumnVector.offsets[adjustedIndex];
final int size = (int) mapColumnVector.lengths[adjustedIndex];
final Map<Object, Object> map = new LinkedHashMap<Object, Object>();
for (int i = 0; i < size; i++) {
final Object key = extractRowColumn(mapColumnVector.keys, mapTypeInfo.getMapKeyTypeInfo(), mapObjectInspector.getMapKeyObjectInspector(), offset + i);
final Object value = extractRowColumn(mapColumnVector.values, mapTypeInfo.getMapValueTypeInfo(), mapObjectInspector.getMapValueObjectInspector(), offset + i);
map.put(key, value);
}
return map;
}
case STRUCT:
{
final StructColumnVector structColumnVector = (StructColumnVector) colVector;
final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
final StandardStructObjectInspector structInspector = (StandardStructObjectInspector) objectInspector;
final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
final int size = fieldTypeInfos.size();
final List<? extends StructField> structFields = structInspector.getAllStructFieldRefs();
final Object struct = structInspector.create();
for (int i = 0; i < size; i++) {
final StructField structField = structFields.get(i);
final TypeInfo fieldTypeInfo = fieldTypeInfos.get(i);
final Object value = extractRowColumn(structColumnVector.fields[i], fieldTypeInfo, structField.getFieldObjectInspector(), adjustedIndex);
structInspector.setStructFieldData(struct, structField, value);
}
return struct;
}
case UNION:
{
final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
final UnionObjectInspector unionInspector = (UnionObjectInspector) objectInspector;
final List<ObjectInspector> unionInspectors = unionInspector.getObjectInspectors();
final UnionColumnVector unionColumnVector = (UnionColumnVector) colVector;
final byte tag = (byte) unionColumnVector.tags[adjustedIndex];
final Object object = extractRowColumn(unionColumnVector.fields[tag], objectTypeInfos.get(tag), unionInspectors.get(tag), adjustedIndex);
final StandardUnion standardUnion = new StandardUnion();
standardUnion.setTag(tag);
standardUnion.setObject(object);
return standardUnion;
}
default:
throw new RuntimeException("Category " + category.name() + " not supported");
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class HBaseRowSerializer method serialize.
private boolean serialize(Object obj, ObjectInspector objInspector, int level, ByteStream.Output ss) throws IOException {
switch(objInspector.getCategory()) {
case PRIMITIVE:
LazyUtils.writePrimitiveUTF8(ss, obj, (PrimitiveObjectInspector) objInspector, escaped, escapeChar, needsEscape);
return true;
case LIST:
char separator = (char) separators[level];
ListObjectInspector loi = (ListObjectInspector) objInspector;
List<?> list = loi.getList(obj);
ObjectInspector eoi = loi.getListElementObjectInspector();
if (list == null) {
return false;
} else {
for (int i = 0; i < list.size(); i++) {
if (i > 0) {
ss.write(separator);
}
Object currentItem = list.get(i);
if (currentItem != null) {
serialize(currentItem, eoi, level + 1, ss);
}
}
}
return true;
case MAP:
char sep = (char) separators[level];
char keyValueSeparator = (char) separators[level + 1];
MapObjectInspector moi = (MapObjectInspector) objInspector;
ObjectInspector koi = moi.getMapKeyObjectInspector();
ObjectInspector voi = moi.getMapValueObjectInspector();
Map<?, ?> map = moi.getMap(obj);
if (map == null) {
return false;
} else {
boolean first = true;
for (Map.Entry<?, ?> entry : map.entrySet()) {
if (first) {
first = false;
} else {
ss.write(sep);
}
serialize(entry.getKey(), koi, level + 2, ss);
Object currentValue = entry.getValue();
if (currentValue != null) {
ss.write(keyValueSeparator);
serialize(currentValue, voi, level + 2, ss);
}
}
}
return true;
case STRUCT:
sep = (char) separators[level];
StructObjectInspector soi = (StructObjectInspector) objInspector;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
list = soi.getStructFieldsDataAsList(obj);
if (list == null) {
return false;
} else {
for (int i = 0; i < list.size(); i++) {
if (i > 0) {
ss.write(sep);
}
Object currentItem = list.get(i);
if (currentItem != null) {
serialize(currentItem, fields.get(i).getFieldObjectInspector(), level + 1, ss);
}
}
}
return true;
case UNION:
// union type currently not totally supported. See HIVE-2390
return false;
default:
throw new RuntimeException("Unknown category type: " + objInspector.getCategory());
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class HBaseRowSerializer method serializeKeyField.
byte[] serializeKeyField(Object keyValue, StructField keyField, ColumnMapping keyMapping) throws IOException {
if (keyValue == null) {
throw new IOException("HBase row key cannot be NULL");
}
ObjectInspector keyFieldOI = keyField.getFieldObjectInspector();
if (!keyFieldOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && keyMapping.isCategory(ObjectInspector.Category.PRIMITIVE)) {
// we always serialize the String type using the escaped algorithm for LazyString
return serialize(SerDeUtils.getJSONString(keyValue, keyFieldOI), PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false);
}
// use the serialization option switch to write primitive values as either a variable
// length UTF8 string or a fixed width bytes if serializing in binary format
boolean writeBinary = keyMapping.binaryStorage.get(0);
return serialize(keyValue, keyFieldOI, 1, writeBinary);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class HBaseRowSerializer method serializeField.
private void serializeField(Object value, StructField field, ColumnMapping colMap, Put put) throws IOException {
if (value == null) {
// a null object, we do not serialize it
return;
}
// Get the field objectInspector and the field object.
ObjectInspector foi = field.getFieldObjectInspector();
// If the field corresponds to a column family in HBase
if (colMap.qualifierName == null) {
MapObjectInspector moi = (MapObjectInspector) foi;
Map<?, ?> map = moi.getMap(value);
if (map == null) {
return;
}
ObjectInspector koi = moi.getMapKeyObjectInspector();
ObjectInspector voi = moi.getMapValueObjectInspector();
for (Map.Entry<?, ?> entry : map.entrySet()) {
// Get the Key
// Map keys are required to be primitive and may be serialized in binary format
byte[] columnQualifierBytes = serialize(entry.getKey(), koi, 3, colMap.binaryStorage.get(0));
if (columnQualifierBytes == null) {
continue;
}
// Map values may be serialized in binary format when they are primitive and binary
// serialization is the option selected
byte[] bytes = serialize(entry.getValue(), voi, 3, colMap.binaryStorage.get(1));
if (bytes == null) {
continue;
}
put.addColumn(colMap.familyNameBytes, columnQualifierBytes, bytes);
}
} else {
byte[] bytes;
// delimited way.
if (!foi.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && colMap.isCategory(ObjectInspector.Category.PRIMITIVE)) {
// we always serialize the String type using the escaped algorithm for LazyString
bytes = serialize(SerDeUtils.getJSONString(value, foi), PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false);
} else {
// use the serialization option switch to write primitive values as either a variable
// length UTF8 string or a fixed width bytes if serializing in binary format
bytes = serialize(value, foi, 1, colMap.binaryStorage.get(0));
}
if (bytes == null) {
return;
}
put.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes, bytes);
}
}
Aggregations