use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class StatsUtils method getSizeOfComplexTypes.
/**
* Get the size of complex data types
* @return raw data size
*/
public static long getSizeOfComplexTypes(HiveConf conf, ObjectInspector oi) {
long result = 0;
int length = 0;
int listEntries = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_LIST_NUM_ENTRIES);
int mapEntries = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAP_NUM_ENTRIES);
switch(oi.getCategory()) {
case PRIMITIVE:
String colTypeLowerCase = oi.getTypeName().toLowerCase();
if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
int avgColLen = (int) getAvgColLenOf(conf, oi, colTypeLowerCase);
result += JavaDataModel.get().lengthForStringOfLength(avgColLen);
} else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
int avgColLen = (int) getAvgColLenOf(conf, oi, colTypeLowerCase);
result += JavaDataModel.get().lengthForByteArrayOfSize(avgColLen);
} else {
result += getAvgColLenOfFixedLengthTypes(colTypeLowerCase);
}
break;
case LIST:
if (oi instanceof StandardConstantListObjectInspector) {
// constant list projection of known length
StandardConstantListObjectInspector scloi = (StandardConstantListObjectInspector) oi;
List<?> value = scloi.getWritableConstantValue();
if (null == value) {
length = 0;
} else {
length = value.size();
}
// check if list elements are primitive or Objects
ObjectInspector leoi = scloi.getListElementObjectInspector();
if (leoi.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) {
int maxVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH);
result += getSizeOfPrimitiveTypeArraysFromType(leoi.getTypeName(), length, maxVarLen);
} else {
result += JavaDataModel.get().lengthForObjectArrayOfSize(length);
}
} else {
StandardListObjectInspector sloi = (StandardListObjectInspector) oi;
// list overhead + (configured number of element in list * size of element)
long elemSize = getSizeOfComplexTypes(conf, sloi.getListElementObjectInspector());
result += JavaDataModel.get().arrayList() + (listEntries * elemSize);
}
break;
case MAP:
if (oi instanceof StandardConstantMapObjectInspector) {
// constant map projection of known length
StandardConstantMapObjectInspector scmoi = (StandardConstantMapObjectInspector) oi;
result += getSizeOfMap(scmoi);
} else {
StandardMapObjectInspector smoi = (StandardMapObjectInspector) oi;
result += getSizeOfComplexTypes(conf, smoi.getMapKeyObjectInspector());
result += getSizeOfComplexTypes(conf, smoi.getMapValueObjectInspector());
// hash map overhead
result += JavaDataModel.get().hashMap(mapEntries);
}
break;
case STRUCT:
if (oi instanceof StandardConstantStructObjectInspector) {
// constant map projection of known length
StandardConstantStructObjectInspector scsoi = (StandardConstantStructObjectInspector) oi;
result += getSizeOfStruct(scsoi);
} else {
StructObjectInspector soi = (StructObjectInspector) oi;
// add constant object overhead for struct
result += JavaDataModel.get().object();
// add constant struct field names references overhead
result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref();
for (StructField field : soi.getAllStructFieldRefs()) {
result += getSizeOfComplexTypes(conf, field.getFieldObjectInspector());
}
}
break;
case UNION:
UnionObjectInspector uoi = (UnionObjectInspector) oi;
// add constant object overhead for union
result += JavaDataModel.get().object();
// add constant size for unions tags
result += uoi.getObjectInspectors().size() * JavaDataModel.get().primitive1();
for (ObjectInspector foi : uoi.getObjectInspectors()) {
result += getSizeOfComplexTypes(conf, foi);
}
break;
default:
break;
}
return result;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class GenericUDFTrunc method initializeDate.
private ObjectInspector initializeDate(ObjectInspector[] arguments) throws UDFArgumentLengthException, UDFArgumentTypeException {
if (arguments.length != 2) {
throw new UDFArgumentLengthException("trunc() requires 2 argument, got " + arguments.length);
}
if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " + arguments[0].getTypeName() + " is passed. as first arguments");
}
if (arguments[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(1, "Only primitive type arguments are accepted but " + arguments[1].getTypeName() + " is passed. as second arguments");
}
ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
inputType1 = ((PrimitiveObjectInspector) arguments[0]).getPrimitiveCategory();
switch(inputType1) {
case STRING:
case VARCHAR:
case CHAR:
case VOID:
inputType1 = PrimitiveCategory.STRING;
textConverter1 = ObjectInspectorConverters.getConverter(arguments[0], PrimitiveObjectInspectorFactory.writableStringObjectInspector);
break;
case TIMESTAMP:
timestampConverter = new TimestampConverter((PrimitiveObjectInspector) arguments[0], PrimitiveObjectInspectorFactory.writableTimestampObjectInspector);
break;
case DATE:
dateWritableConverter = ObjectInspectorConverters.getConverter(arguments[0], PrimitiveObjectInspectorFactory.writableDateObjectInspector);
break;
default:
throw new UDFArgumentTypeException(0, "TRUNC() only takes STRING/TIMESTAMP/DATEWRITABLE types as first argument, got " + inputType1);
}
inputType2 = ((PrimitiveObjectInspector) arguments[1]).getPrimitiveCategory();
if (PrimitiveObjectInspectorUtils.getPrimitiveGrouping(inputType2) != PrimitiveGrouping.STRING_GROUP && PrimitiveObjectInspectorUtils.getPrimitiveGrouping(inputType2) != PrimitiveGrouping.VOID_GROUP) {
throw new UDFArgumentTypeException(1, "trunc() only takes STRING/CHAR/VARCHAR types as second argument, got " + inputType2);
}
inputType2 = PrimitiveCategory.STRING;
if (arguments[1] instanceof ConstantObjectInspector) {
Object obj = ((ConstantObjectInspector) arguments[1]).getWritableConstantValue();
fmtInput = obj != null ? obj.toString() : null;
} else {
textConverter2 = ObjectInspectorConverters.getConverter(arguments[1], PrimitiveObjectInspectorFactory.writableStringObjectInspector);
}
return outputOI;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class GenericUDFToVarchar method initialize.
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != 1) {
throw new UDFArgumentException("VARCHAR cast requires a value argument");
}
try {
argumentOI = (PrimitiveObjectInspector) arguments[0];
} catch (ClassCastException e) {
throw new UDFArgumentException("The function VARCHAR takes only primitive types");
}
// Check if this UDF has been provided with type params for the output varchar type
SettableHiveVarcharObjectInspector outputOI;
outputOI = (SettableHiveVarcharObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
converter = new HiveVarcharConverter(argumentOI, outputOI);
return outputOI;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class GenericUDFToChar method initialize.
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != 1) {
throw new UDFArgumentException("CHAR cast requires a value argument");
}
try {
argumentOI = (PrimitiveObjectInspector) arguments[0];
} catch (ClassCastException e) {
throw new UDFArgumentException("The function CHAR takes only primitive types");
}
// Check if this UDF has been provided with type params for the output char type
SettableHiveCharObjectInspector outputOI;
outputOI = (SettableHiveCharObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
converter = new HiveCharConverter(argumentOI, outputOI);
return outputOI;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class VerifyFastRow method serializeWrite.
public static void serializeWrite(SerializeWrite serializeWrite, TypeInfo typeInfo, Object object) throws IOException {
if (object == null) {
serializeWrite.writeNull();
return;
}
switch(typeInfo.getCategory()) {
case PRIMITIVE:
{
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
switch(primitiveTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
{
boolean value = ((BooleanWritable) object).get();
serializeWrite.writeBoolean(value);
}
break;
case BYTE:
{
byte value = ((ByteWritable) object).get();
serializeWrite.writeByte(value);
}
break;
case SHORT:
{
short value = ((ShortWritable) object).get();
serializeWrite.writeShort(value);
}
break;
case INT:
{
int value = ((IntWritable) object).get();
serializeWrite.writeInt(value);
}
break;
case LONG:
{
long value = ((LongWritable) object).get();
serializeWrite.writeLong(value);
}
break;
case FLOAT:
{
float value = ((FloatWritable) object).get();
serializeWrite.writeFloat(value);
}
break;
case DOUBLE:
{
double value = ((DoubleWritable) object).get();
serializeWrite.writeDouble(value);
}
break;
case STRING:
{
Text value = (Text) object;
byte[] stringBytes = value.getBytes();
int stringLength = stringBytes.length;
serializeWrite.writeString(stringBytes, 0, stringLength);
}
break;
case CHAR:
{
HiveChar value = ((HiveCharWritable) object).getHiveChar();
serializeWrite.writeHiveChar(value);
}
break;
case VARCHAR:
{
HiveVarchar value = ((HiveVarcharWritable) object).getHiveVarchar();
serializeWrite.writeHiveVarchar(value);
}
break;
case DECIMAL:
{
HiveDecimal value = ((HiveDecimalWritable) object).getHiveDecimal();
DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
serializeWrite.writeHiveDecimal(value, decTypeInfo.scale());
}
break;
case DATE:
{
Date value = ((DateWritableV2) object).get();
serializeWrite.writeDate(value);
}
break;
case TIMESTAMP:
{
Timestamp value = ((TimestampWritableV2) object).getTimestamp();
serializeWrite.writeTimestamp(value);
}
break;
case INTERVAL_YEAR_MONTH:
{
HiveIntervalYearMonth value = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth();
serializeWrite.writeHiveIntervalYearMonth(value);
}
break;
case INTERVAL_DAY_TIME:
{
HiveIntervalDayTime value = ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime();
serializeWrite.writeHiveIntervalDayTime(value);
}
break;
case BINARY:
{
BytesWritable byteWritable = (BytesWritable) object;
byte[] binaryBytes = byteWritable.getBytes();
int length = byteWritable.getLength();
serializeWrite.writeBinary(binaryBytes, 0, length);
}
break;
default:
throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory().name());
}
}
break;
case LIST:
{
ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
ArrayList<Object> elements = (ArrayList<Object>) object;
serializeWrite.beginList(elements);
boolean isFirst = true;
for (Object elementObject : elements) {
if (isFirst) {
isFirst = false;
} else {
serializeWrite.separateList();
}
if (elementObject == null) {
serializeWrite.writeNull();
} else {
serializeWrite(serializeWrite, elementTypeInfo, elementObject);
}
}
serializeWrite.finishList();
}
break;
case MAP:
{
MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
Map<Object, Object> hashMap = (Map<Object, Object>) object;
serializeWrite.beginMap(hashMap);
boolean isFirst = true;
for (Map.Entry<Object, Object> entry : hashMap.entrySet()) {
if (isFirst) {
isFirst = false;
} else {
serializeWrite.separateKeyValuePair();
}
if (entry.getKey() == null) {
serializeWrite.writeNull();
} else {
serializeWrite(serializeWrite, keyTypeInfo, entry.getKey());
}
serializeWrite.separateKey();
if (entry.getValue() == null) {
serializeWrite.writeNull();
} else {
serializeWrite(serializeWrite, valueTypeInfo, entry.getValue());
}
}
serializeWrite.finishMap();
}
break;
case STRUCT:
{
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
List<Object> fieldValues = (List<Object>) object;
final int size = fieldValues.size();
serializeWrite.beginStruct(fieldValues);
boolean isFirst = true;
for (int i = 0; i < size; i++) {
if (isFirst) {
isFirst = false;
} else {
serializeWrite.separateStruct();
}
serializeWrite(serializeWrite, fieldTypeInfos.get(i), fieldValues.get(i));
}
serializeWrite.finishStruct();
}
break;
case UNION:
{
UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
List<TypeInfo> fieldTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
final int size = fieldTypeInfos.size();
StandardUnionObjectInspector.StandardUnion standardUnion = (StandardUnionObjectInspector.StandardUnion) object;
byte tag = standardUnion.getTag();
serializeWrite.beginUnion(tag);
serializeWrite(serializeWrite, fieldTypeInfos.get(tag), standardUnion.getObject());
serializeWrite.finishUnion();
}
break;
default:
throw new Error("Unknown category " + typeInfo.getCategory().name());
}
}
Aggregations