use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.
the class SerdeRandomRowSource method getWritableObject.
public Object getWritableObject(int column, Object object) {
ObjectInspector objectInspector = primitiveObjectInspectorList.get(column);
PrimitiveCategory primitiveCategory = primitiveCategories[column];
PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
switch(primitiveCategory) {
case BOOLEAN:
return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object);
case BYTE:
return ((WritableByteObjectInspector) objectInspector).create((byte) object);
case SHORT:
return ((WritableShortObjectInspector) objectInspector).create((short) object);
case INT:
return ((WritableIntObjectInspector) objectInspector).create((int) object);
case LONG:
return ((WritableLongObjectInspector) objectInspector).create((long) object);
case DATE:
return ((WritableDateObjectInspector) objectInspector).create((Date) object);
case FLOAT:
return ((WritableFloatObjectInspector) objectInspector).create((float) object);
case DOUBLE:
return ((WritableDoubleObjectInspector) objectInspector).create((double) object);
case STRING:
return ((WritableStringObjectInspector) objectInspector).create((String) object);
case CHAR:
{
WritableHiveCharObjectInspector writableCharObjectInspector = new WritableHiveCharObjectInspector((CharTypeInfo) primitiveTypeInfo);
return writableCharObjectInspector.create((HiveChar) object);
}
case VARCHAR:
{
WritableHiveVarcharObjectInspector writableVarcharObjectInspector = new WritableHiveVarcharObjectInspector((VarcharTypeInfo) primitiveTypeInfo);
return writableVarcharObjectInspector.create((HiveVarchar) object);
}
case BINARY:
return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create((byte[]) object);
case TIMESTAMP:
return ((WritableTimestampObjectInspector) objectInspector).create((Timestamp) object);
case INTERVAL_YEAR_MONTH:
return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create((HiveIntervalYearMonth) object);
case INTERVAL_DAY_TIME:
return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create((HiveIntervalDayTime) object);
case DECIMAL:
{
WritableHiveDecimalObjectInspector writableDecimalObjectInspector = new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
return writableDecimalObjectInspector.create((HiveDecimal) object);
}
default:
throw new Error("Unknown primitive category " + primitiveCategory);
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project presto by prestodb.
the class HiveBucketing method getJavaObjectInspector.
private static ObjectInspector getJavaObjectInspector(ObjectInspector objectInspector) {
checkArgument(objectInspector.getCategory() == Category.PRIMITIVE, "Unsupported object inspector category %s", objectInspector.getCategory());
PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) objectInspector);
switch(poi.getPrimitiveCategory()) {
case BOOLEAN:
return javaBooleanObjectInspector;
case BYTE:
return javaByteObjectInspector;
case SHORT:
return javaShortObjectInspector;
case INT:
return javaIntObjectInspector;
case LONG:
return javaLongObjectInspector;
case STRING:
return javaStringObjectInspector;
}
throw new RuntimeException("Unsupported type: " + poi.getPrimitiveCategory());
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.
the class VectorizedListColumnReader method readBatch.
@Override
public void readBatch(int total, ColumnVector column, TypeInfo columnType) throws IOException {
ListColumnVector lcv = (ListColumnVector) column;
// before readBatch, initial the size of offsets & lengths as the default value,
// the actual size will be assigned in setChildrenInfo() after reading complete.
lcv.offsets = new long[VectorizedRowBatch.DEFAULT_SIZE];
lcv.lengths = new long[VectorizedRowBatch.DEFAULT_SIZE];
// Because the length of ListColumnVector.child can't be known now,
// the valueList will save all data for ListColumnVector temporary.
List<Object> valueList = new ArrayList<>();
PrimitiveObjectInspector.PrimitiveCategory category = ((PrimitiveTypeInfo) ((ListTypeInfo) columnType).getListElementTypeInfo()).getPrimitiveCategory();
// read the first row in parquet data page, this will be only happened once for this instance
if (isFirstRow) {
if (!fetchNextValue(category)) {
return;
}
isFirstRow = false;
}
int index = 0;
while (!eof && index < total) {
// add element to ListColumnVector one by one
addElement(lcv, valueList, category, index);
index++;
}
// Decode the value if necessary
if (isCurrentPageDictionaryEncoded) {
valueList = decodeDictionaryIds(category, valueList);
}
// Convert valueList to array for the ListColumnVector.child
convertValueListToListColumnVector(category, lcv, valueList, index);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.
the class VectorizedParquetRecordReader method buildVectorizedParquetReader.
// Build VectorizedParquetColumnReader via Hive typeInfo and Parquet schema
private VectorizedColumnReader buildVectorizedParquetReader(TypeInfo typeInfo, Type type, PageReadStore pages, List<ColumnDescriptor> columnDescriptors, boolean skipTimestampConversion, int depth) throws IOException {
List<ColumnDescriptor> descriptors = getAllColumnDescriptorByType(depth, type, columnDescriptors);
switch(typeInfo.getCategory()) {
case PRIMITIVE:
if (columnDescriptors == null || columnDescriptors.isEmpty()) {
throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
}
if (fileSchema.getColumns().contains(descriptors.get(0))) {
return new VectorizedPrimitiveColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, type, typeInfo);
} else {
// Support for schema evolution
return new VectorizedDummyColumnReader();
}
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List<VectorizedColumnReader> fieldReaders = new ArrayList<>();
List<TypeInfo> fieldTypes = structTypeInfo.getAllStructFieldTypeInfos();
List<Type> types = type.asGroupType().getFields();
for (int i = 0; i < fieldTypes.size(); i++) {
VectorizedColumnReader r = buildVectorizedParquetReader(fieldTypes.get(i), types.get(i), pages, descriptors, skipTimestampConversion, depth + 1);
if (r != null) {
fieldReaders.add(r);
} else {
throw new RuntimeException("Fail to build Parquet vectorized reader based on Hive type " + fieldTypes.get(i).getTypeName() + " and Parquet type" + types.get(i).toString());
}
}
return new VectorizedStructColumnReader(fieldReaders);
case LIST:
checkListColumnSupport(((ListTypeInfo) typeInfo).getListElementTypeInfo());
if (columnDescriptors == null || columnDescriptors.isEmpty()) {
throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
}
return new VectorizedListColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, getElementType(type), typeInfo);
case MAP:
if (columnDescriptors == null || columnDescriptors.isEmpty()) {
throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
}
// to handle the different Map definition in Parquet, eg:
// definition has 1 group:
// repeated group map (MAP_KEY_VALUE)
// {required binary key (UTF8); optional binary value (UTF8);}
// definition has 2 groups:
// optional group m1 (MAP) {
// repeated group map (MAP_KEY_VALUE)
// {required binary key (UTF8); optional binary value (UTF8);}
// }
int nestGroup = 0;
GroupType groupType = type.asGroupType();
// otherwise, continue to get the group type until MAP_DEFINITION_LEVEL_MAX.
while (groupType.getFieldCount() < 2) {
if (nestGroup > MAP_DEFINITION_LEVEL_MAX) {
throw new RuntimeException("More than " + MAP_DEFINITION_LEVEL_MAX + " level is found in Map definition, " + "Failed to get the field types for Map with type " + type);
}
groupType = groupType.getFields().get(0).asGroupType();
nestGroup++;
}
List<Type> kvTypes = groupType.getFields();
VectorizedListColumnReader keyListColumnReader = new VectorizedListColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, kvTypes.get(0), typeInfo);
VectorizedListColumnReader valueListColumnReader = new VectorizedListColumnReader(descriptors.get(1), pages.getPageReader(descriptors.get(1)), skipTimestampConversion, kvTypes.get(1), typeInfo);
return new VectorizedMapColumnReader(keyListColumnReader, valueListColumnReader);
case UNION:
default:
throw new RuntimeException("Unsupported category " + typeInfo.getCategory().name());
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.
the class PushdownTuple method getConstantAsBytes.
/**
* @return byte [] value from writable.
* @throws SerDeException
*/
public byte[] getConstantAsBytes(Writable writable) throws SerDeException {
if (pCompare instanceof StringCompare) {
return writable.toString().getBytes();
} else if (pCompare instanceof DoubleCompare) {
byte[] bts = new byte[8];
double val = ((DoubleWritable) writable).get();
ByteBuffer.wrap(bts).putDouble(val);
return bts;
} else if (pCompare instanceof IntCompare) {
byte[] bts = new byte[4];
int val = ((IntWritable) writable).get();
ByteBuffer.wrap(bts).putInt(val);
return bts;
} else if (pCompare instanceof LongCompare) {
byte[] bts = new byte[8];
long val = ((LongWritable) writable).get();
ByteBuffer.wrap(bts).putLong(val);
return bts;
} else {
throw new SerDeException("Unsupported primitive category: " + pCompare.getClass().getName());
}
}
Aggregations