use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class VectorExtractRow method extractRowColumn.
public Object extractRowColumn(ColumnVector colVector, TypeInfo typeInfo, ObjectInspector objectInspector, int batchIndex) {
if (colVector == null) {
// may ask for them..
return null;
}
final int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex);
if (!colVector.noNulls && colVector.isNull[adjustedIndex]) {
return null;
}
final Category category = typeInfo.getCategory();
switch(category) {
case PRIMITIVE:
{
final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
final PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
final Writable primitiveWritable = VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory);
switch(primitiveCategory) {
case VOID:
return null;
case BOOLEAN:
((BooleanWritable) primitiveWritable).set(((LongColumnVector) colVector).vector[adjustedIndex] == 0 ? false : true);
return primitiveWritable;
case BYTE:
((ByteWritable) primitiveWritable).set((byte) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case SHORT:
((ShortWritable) primitiveWritable).set((short) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case INT:
((IntWritable) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case LONG:
((LongWritable) primitiveWritable).set(((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case TIMESTAMP:
((TimestampWritable) primitiveWritable).set(((TimestampColumnVector) colVector).asScratchTimestamp(adjustedIndex));
return primitiveWritable;
case DATE:
((DateWritable) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case FLOAT:
((FloatWritable) primitiveWritable).set((float) ((DoubleColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case DOUBLE:
((DoubleWritable) primitiveWritable).set(((DoubleColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case BINARY:
{
final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
if (bytesColVector.isRepeating) {
if (!bytesColVector.isNull[0] && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
} else {
if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
}
BytesWritable bytesWritable = (BytesWritable) primitiveWritable;
bytesWritable.set(bytes, start, length);
return primitiveWritable;
}
case STRING:
{
final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
if (bytesColVector.isRepeating) {
if (!bytesColVector.isNull[0] && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
} else {
if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
}
// Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
((Text) primitiveWritable).set(bytes, start, length);
return primitiveWritable;
}
case VARCHAR:
{
final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
if (bytesColVector.isRepeating) {
if (!bytesColVector.isNull[0] && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
} else {
if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
}
final int adjustedLength = StringExpr.truncate(bytes, start, length, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
final HiveVarcharWritable hiveVarcharWritable = (HiveVarcharWritable) primitiveWritable;
hiveVarcharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), -1);
return primitiveWritable;
}
case CHAR:
{
final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
if (bytesColVector.isRepeating) {
if (!bytesColVector.isNull[0] && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
} else {
if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
}
final int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, ((CharTypeInfo) primitiveTypeInfo).getLength());
final HiveCharWritable hiveCharWritable = (HiveCharWritable) primitiveWritable;
hiveCharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), ((CharTypeInfo) primitiveTypeInfo).getLength());
return primitiveWritable;
}
case DECIMAL:
if (colVector instanceof Decimal64ColumnVector) {
Decimal64ColumnVector dec32ColVector = (Decimal64ColumnVector) colVector;
((HiveDecimalWritable) primitiveWritable).deserialize64(dec32ColVector.vector[adjustedIndex], dec32ColVector.scale);
} else {
// The HiveDecimalWritable set method will quickly copy the deserialized decimal writable fields.
((HiveDecimalWritable) primitiveWritable).set(((DecimalColumnVector) colVector).vector[adjustedIndex]);
}
return primitiveWritable;
case INTERVAL_YEAR_MONTH:
((HiveIntervalYearMonthWritable) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case INTERVAL_DAY_TIME:
((HiveIntervalDayTimeWritable) primitiveWritable).set(((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedIndex));
return primitiveWritable;
default:
throw new RuntimeException("Primitive category " + primitiveCategory.name() + " not supported");
}
}
case LIST:
{
final ListColumnVector listColumnVector = (ListColumnVector) colVector;
final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
final ListObjectInspector listObjectInspector = (ListObjectInspector) objectInspector;
final int offset = (int) listColumnVector.offsets[adjustedIndex];
final int size = (int) listColumnVector.lengths[adjustedIndex];
final List list = new ArrayList();
for (int i = 0; i < size; i++) {
list.add(extractRowColumn(listColumnVector.child, listTypeInfo.getListElementTypeInfo(), listObjectInspector.getListElementObjectInspector(), offset + i));
}
return list;
}
case MAP:
{
final MapColumnVector mapColumnVector = (MapColumnVector) colVector;
final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
final MapObjectInspector mapObjectInspector = (MapObjectInspector) objectInspector;
final int offset = (int) mapColumnVector.offsets[adjustedIndex];
final int size = (int) mapColumnVector.lengths[adjustedIndex];
final Map map = new HashMap();
for (int i = 0; i < size; i++) {
final Object key = extractRowColumn(mapColumnVector.keys, mapTypeInfo.getMapKeyTypeInfo(), mapObjectInspector.getMapKeyObjectInspector(), offset + i);
final Object value = extractRowColumn(mapColumnVector.values, mapTypeInfo.getMapValueTypeInfo(), mapObjectInspector.getMapValueObjectInspector(), offset + i);
map.put(key, value);
}
return map;
}
case STRUCT:
{
final StructColumnVector structColumnVector = (StructColumnVector) colVector;
final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
final StandardStructObjectInspector structInspector = (StandardStructObjectInspector) objectInspector;
final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
final int size = fieldTypeInfos.size();
final List<? extends StructField> structFields = structInspector.getAllStructFieldRefs();
final Object struct = structInspector.create();
for (int i = 0; i < size; i++) {
final StructField structField = structFields.get(i);
final TypeInfo fieldTypeInfo = fieldTypeInfos.get(i);
final Object value = extractRowColumn(structColumnVector.fields[i], fieldTypeInfo, structField.getFieldObjectInspector(), adjustedIndex);
structInspector.setStructFieldData(struct, structField, value);
}
return struct;
}
case UNION:
{
final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
final UnionObjectInspector unionInspector = (UnionObjectInspector) objectInspector;
final List<ObjectInspector> unionInspectors = unionInspector.getObjectInspectors();
final UnionColumnVector unionColumnVector = (UnionColumnVector) colVector;
final byte tag = (byte) unionColumnVector.tags[adjustedIndex];
final Object object = extractRowColumn(unionColumnVector.fields[tag], objectTypeInfos.get(tag), unionInspectors.get(tag), adjustedIndex);
final StandardUnion standardUnion = new StandardUnion();
standardUnion.setTag(tag);
standardUnion.setObject(object);
return standardUnion;
}
default:
throw new RuntimeException("Category " + category.name() + " not supported");
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class VectorHashKeyWrapper method stringifyKeys.
/*
* This method is mainly intended for debug display purposes.
*/
public String stringifyKeys(VectorColumnSetInfo columnSetInfo) {
StringBuilder sb = new StringBuilder();
boolean isFirstKey = true;
if (longValues.length > 0) {
isFirstKey = false;
sb.append("longs ");
boolean isFirstValue = true;
for (int i = 0; i < columnSetInfo.longIndices.length; i++) {
if (isFirstValue) {
isFirstValue = false;
} else {
sb.append(", ");
}
int keyIndex = columnSetInfo.longIndices[i];
if (isNull[keyIndex]) {
sb.append("null");
} else {
sb.append(longValues[i]);
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) columnSetInfo.typeInfos[keyIndex];
// FUTURE: Add INTERVAL_YEAR_MONTH, etc, as desired.
switch(primitiveTypeInfo.getPrimitiveCategory()) {
case DATE:
{
Date dt = new Date(0);
dt.setTime(DateWritable.daysToMillis((int) longValues[i]));
sb.append(" date ");
sb.append(dt.toString());
}
break;
default:
// Add nothing more.
break;
}
}
}
}
if (doubleValues.length > 0) {
if (isFirstKey) {
isFirstKey = false;
} else {
sb.append(", ");
}
sb.append("doubles ");
boolean isFirstValue = true;
for (int i = 0; i < columnSetInfo.doubleIndices.length; i++) {
if (isFirstValue) {
isFirstValue = false;
} else {
sb.append(", ");
}
int keyIndex = columnSetInfo.doubleIndices[i];
if (isNull[keyIndex]) {
sb.append("null");
} else {
sb.append(doubleValues[i]);
}
}
}
if (byteValues.length > 0) {
if (isFirstKey) {
isFirstKey = false;
} else {
sb.append(", ");
}
sb.append("byte lengths ");
boolean isFirstValue = true;
for (int i = 0; i < columnSetInfo.stringIndices.length; i++) {
if (isFirstValue) {
isFirstValue = false;
} else {
sb.append(", ");
}
int keyIndex = columnSetInfo.stringIndices[i];
if (isNull[keyIndex]) {
sb.append("null");
} else {
sb.append(byteLengths[i]);
}
}
}
if (decimalValues.length > 0) {
if (isFirstKey) {
isFirstKey = true;
} else {
sb.append(", ");
}
sb.append("decimals ");
boolean isFirstValue = true;
for (int i = 0; i < columnSetInfo.decimalIndices.length; i++) {
if (isFirstValue) {
isFirstValue = false;
} else {
sb.append(", ");
}
int keyIndex = columnSetInfo.decimalIndices[i];
if (isNull[keyIndex]) {
sb.append("null");
} else {
sb.append(decimalValues[i]);
}
}
}
if (timestampValues.length > 0) {
if (isFirstKey) {
isFirstKey = false;
} else {
sb.append(", ");
}
sb.append("timestamps ");
boolean isFirstValue = true;
for (int i = 0; i < columnSetInfo.timestampIndices.length; i++) {
if (isFirstValue) {
isFirstValue = false;
} else {
sb.append(", ");
}
int keyIndex = columnSetInfo.timestampIndices[i];
if (isNull[keyIndex]) {
sb.append("null");
} else {
sb.append(timestampValues[i]);
}
}
}
if (intervalDayTimeValues.length > 0) {
if (isFirstKey) {
isFirstKey = false;
} else {
sb.append(", ");
}
sb.append("interval day times ");
boolean isFirstValue = true;
for (int i = 0; i < columnSetInfo.intervalDayTimeIndices.length; i++) {
if (isFirstValue) {
isFirstValue = false;
} else {
sb.append(", ");
}
int keyIndex = columnSetInfo.intervalDayTimeIndices[i];
if (isNull[keyIndex]) {
sb.append("null");
} else {
sb.append(intervalDayTimeValues[i]);
}
}
}
return sb.toString();
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class VectorizationContext method getGenericUDFForCast.
private GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveException {
UDF udfClass = null;
GenericUDF genericUdf = null;
switch(((PrimitiveTypeInfo) castType).getPrimitiveCategory()) {
case BYTE:
udfClass = new UDFToByte();
break;
case SHORT:
udfClass = new UDFToShort();
break;
case INT:
udfClass = new UDFToInteger();
break;
case LONG:
udfClass = new UDFToLong();
break;
case FLOAT:
udfClass = new UDFToFloat();
break;
case DOUBLE:
udfClass = new UDFToDouble();
break;
case STRING:
udfClass = new UDFToString();
break;
case CHAR:
genericUdf = new GenericUDFToChar();
break;
case VARCHAR:
genericUdf = new GenericUDFToVarchar();
break;
case BOOLEAN:
udfClass = new UDFToBoolean();
break;
case DATE:
genericUdf = new GenericUDFToDate();
break;
case TIMESTAMP:
genericUdf = new GenericUDFTimestamp();
break;
case INTERVAL_YEAR_MONTH:
genericUdf = new GenericUDFToIntervalYearMonth();
break;
case INTERVAL_DAY_TIME:
genericUdf = new GenericUDFToIntervalDayTime();
break;
case BINARY:
genericUdf = new GenericUDFToBinary();
break;
case DECIMAL:
genericUdf = new GenericUDFToDecimal();
break;
case VOID:
case UNKNOWN:
// fall-through to throw exception, its not expected for execution to reach here.
break;
}
if (genericUdf == null) {
if (udfClass == null) {
throw new HiveException("Could not add implicit cast for type " + castType.getTypeName());
}
genericUdf = new GenericUDFBridge();
((GenericUDFBridge) genericUdf).setUdfClassName(udfClass.getClass().getName());
}
if (genericUdf instanceof SettableUDF) {
((SettableUDF) genericUdf).setTypeInfo(castType);
}
return genericUdf;
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class VectorizationContext method getStructInExpression.
private VectorExpression getStructInExpression(List<ExprNodeDesc> childExpr, ExprNodeDesc colExpr, TypeInfo colTypeInfo, List<ExprNodeDesc> inChildren, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
VectorExpression expr = null;
StructTypeInfo structTypeInfo = (StructTypeInfo) colTypeInfo;
ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
final int fieldCount = fieldTypeInfos.size();
ColumnVector.Type[] fieldVectorColumnTypes = new ColumnVector.Type[fieldCount];
InConstantType[] fieldInConstantTypes = new InConstantType[fieldCount];
for (int f = 0; f < fieldCount; f++) {
TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
// Only primitive fields supports for now.
if (fieldTypeInfo.getCategory() != Category.PRIMITIVE) {
return null;
}
// We are going to serialize using the 4 basic types.
ColumnVector.Type fieldVectorColumnType = getColumnVectorTypeFromTypeInfo(fieldTypeInfo);
fieldVectorColumnTypes[f] = fieldVectorColumnType;
// We currently evaluate the IN (..) constants in special ways.
PrimitiveCategory fieldPrimitiveCategory = ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory();
InConstantType inConstantType = getInConstantTypeFromPrimitiveCategory(fieldPrimitiveCategory);
fieldInConstantTypes[f] = inConstantType;
}
Output buffer = new Output();
BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(fieldCount);
final int inChildrenCount = inChildren.size();
byte[][] serializedInChildren = new byte[inChildrenCount][];
try {
for (int i = 0; i < inChildrenCount; i++) {
final ExprNodeDesc node = inChildren.get(i);
final Object[] constants;
if (node instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc constNode = (ExprNodeConstantDesc) node;
ConstantObjectInspector output = constNode.getWritableObjectInspector();
constants = ((List<?>) output.getWritableConstantValue()).toArray();
} else {
ExprNodeGenericFuncDesc exprNode = (ExprNodeGenericFuncDesc) node;
ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprNode);
ObjectInspector output = evaluator.initialize(exprNode.getWritableObjectInspector());
constants = (Object[]) evaluator.evaluate(null);
}
binarySortableSerializeWrite.set(buffer);
for (int f = 0; f < fieldCount; f++) {
Object constant = constants[f];
if (constant == null) {
binarySortableSerializeWrite.writeNull();
} else {
InConstantType inConstantType = fieldInConstantTypes[f];
switch(inConstantType) {
case STRING_FAMILY:
{
byte[] bytes;
if (constant instanceof Text) {
Text text = (Text) constant;
bytes = text.getBytes();
binarySortableSerializeWrite.writeString(bytes, 0, text.getLength());
} else {
throw new HiveException("Unexpected constant String type " + constant.getClass().getSimpleName());
}
}
break;
case INT_FAMILY:
{
long value;
if (constant instanceof IntWritable) {
value = ((IntWritable) constant).get();
} else if (constant instanceof LongWritable) {
value = ((LongWritable) constant).get();
} else {
throw new HiveException("Unexpected constant Long type " + constant.getClass().getSimpleName());
}
binarySortableSerializeWrite.writeLong(value);
}
break;
case FLOAT_FAMILY:
{
double value;
if (constant instanceof DoubleWritable) {
value = ((DoubleWritable) constant).get();
} else {
throw new HiveException("Unexpected constant Double type " + constant.getClass().getSimpleName());
}
binarySortableSerializeWrite.writeDouble(value);
}
break;
// UNDONE...
case DATE:
case TIMESTAMP:
case DECIMAL:
default:
throw new RuntimeException("Unexpected IN constant type " + inConstantType.name());
}
}
}
serializedInChildren[i] = Arrays.copyOfRange(buffer.getData(), 0, buffer.getLength());
}
} catch (Exception e) {
throw new HiveException(e);
}
// Create a single child representing the scratch column where we will
// generate the serialized keys of the batch.
int scratchBytesCol = ocm.allocateOutputColumn(TypeInfoFactory.stringTypeInfo);
Class<?> cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class);
expr = createVectorExpression(cl, null, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
((IStringInExpr) expr).setInListValues(serializedInChildren);
((IStructInExpr) expr).setScratchBytesColumn(scratchBytesCol);
((IStructInExpr) expr).setStructColumnExprs(this, colExpr.getChildren(), fieldVectorColumnTypes);
return expr;
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class VectorizationContext method castConstantToLong.
private Long castConstantToLong(Object scalar, TypeInfo type, PrimitiveCategory integerPrimitiveCategory) throws HiveException {
if (null == scalar) {
return null;
}
PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
String typename = type.getTypeName();
switch(ptinfo.getPrimitiveCategory()) {
case FLOAT:
case DOUBLE:
case BYTE:
case SHORT:
case INT:
case LONG:
return ((Number) scalar).longValue();
case DECIMAL:
HiveDecimal decimalVal = (HiveDecimal) scalar;
switch(integerPrimitiveCategory) {
case BYTE:
if (!decimalVal.isByte()) {
// Accurate byte value cannot be obtained.
return null;
}
break;
case SHORT:
if (!decimalVal.isShort()) {
// Accurate short value cannot be obtained.
return null;
}
break;
case INT:
if (!decimalVal.isInt()) {
// Accurate int value cannot be obtained.
return null;
}
break;
case LONG:
if (!decimalVal.isLong()) {
// Accurate long value cannot be obtained.
return null;
}
break;
default:
throw new RuntimeException("Unexpected integer primitive type " + integerPrimitiveCategory);
}
// We only store longs in our LongColumnVector.
return decimalVal.longValue();
default:
throw new HiveException("Unsupported type " + typename + " for cast to Long");
}
}
Aggregations