use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector in project hive by apache.
the class ObjectInspectorUtils method hashCode.
public static int hashCode(Object o, ObjectInspector objIns) {
if (o == null) {
return 0;
}
switch(objIns.getCategory()) {
case PRIMITIVE:
{
PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) objIns);
switch(poi.getPrimitiveCategory()) {
case VOID:
return 0;
case BOOLEAN:
return ((BooleanObjectInspector) poi).get(o) ? 1 : 0;
case BYTE:
return ((ByteObjectInspector) poi).get(o);
case SHORT:
return ((ShortObjectInspector) poi).get(o);
case INT:
return ((IntObjectInspector) poi).get(o);
case LONG:
{
long a = ((LongObjectInspector) poi).get(o);
return (int) ((a >>> 32) ^ a);
}
case FLOAT:
return Float.floatToIntBits(((FloatObjectInspector) poi).get(o));
case DOUBLE:
{
// This hash function returns the same result as Double.hashCode()
// while DoubleWritable.hashCode returns a different result.
long a = Double.doubleToLongBits(((DoubleObjectInspector) poi).get(o));
return (int) ((a >>> 32) ^ a);
}
case STRING:
{
// This hash function returns the same result as String.hashCode() when
// all characters are ASCII, while Text.hashCode() always returns a
// different result.
Text t = ((StringObjectInspector) poi).getPrimitiveWritableObject(o);
int r = 0;
for (int i = 0; i < t.getLength(); i++) {
r = r * 31 + t.getBytes()[i];
}
return r;
}
case CHAR:
return ((HiveCharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
case VARCHAR:
return ((HiveVarcharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
case BINARY:
return ((BinaryObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
case DATE:
return ((DateObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
case TIMESTAMP:
TimestampWritable t = ((TimestampObjectInspector) poi).getPrimitiveWritableObject(o);
return t.hashCode();
case INTERVAL_YEAR_MONTH:
HiveIntervalYearMonthWritable intervalYearMonth = ((HiveIntervalYearMonthObjectInspector) poi).getPrimitiveWritableObject(o);
return intervalYearMonth.hashCode();
case INTERVAL_DAY_TIME:
HiveIntervalDayTimeWritable intervalDayTime = ((HiveIntervalDayTimeObjectInspector) poi).getPrimitiveWritableObject(o);
return intervalDayTime.hashCode();
case DECIMAL:
// compatible hash code.
return ((HiveDecimalObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
default:
{
throw new RuntimeException("Unknown type: " + poi.getPrimitiveCategory());
}
}
}
case LIST:
{
int r = 0;
ListObjectInspector listOI = (ListObjectInspector) objIns;
ObjectInspector elemOI = listOI.getListElementObjectInspector();
for (int ii = 0; ii < listOI.getListLength(o); ++ii) {
r = 31 * r + hashCode(listOI.getListElement(o, ii), elemOI);
}
return r;
}
case MAP:
{
int r = 0;
MapObjectInspector mapOI = (MapObjectInspector) objIns;
ObjectInspector keyOI = mapOI.getMapKeyObjectInspector();
ObjectInspector valueOI = mapOI.getMapValueObjectInspector();
Map<?, ?> map = mapOI.getMap(o);
for (Map.Entry<?, ?> entry : map.entrySet()) {
r += hashCode(entry.getKey(), keyOI) ^ hashCode(entry.getValue(), valueOI);
}
return r;
}
case STRUCT:
int r = 0;
StructObjectInspector structOI = (StructObjectInspector) objIns;
List<? extends StructField> fields = structOI.getAllStructFieldRefs();
for (StructField field : fields) {
r = 31 * r + hashCode(structOI.getStructFieldData(o, field), field.getFieldObjectInspector());
}
return r;
case UNION:
UnionObjectInspector uOI = (UnionObjectInspector) objIns;
byte tag = uOI.getTag(o);
return hashCode(uOI.getField(o), uOI.getObjectInspectors().get(tag));
default:
throw new RuntimeException("Unknown type: " + objIns.getTypeName());
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector in project hive by apache.
the class BinarySortableSerDe method serialize.
static void serialize(ByteStream.Output buffer, Object o, ObjectInspector oi, boolean invert, byte nullMarker, byte notNullMarker) throws SerDeException {
// Is this field a null?
if (o == null) {
writeByte(buffer, nullMarker, invert);
return;
}
// This field is not a null.
writeByte(buffer, notNullMarker, invert);
switch(oi.getCategory()) {
case PRIMITIVE:
{
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
switch(poi.getPrimitiveCategory()) {
case VOID:
{
return;
}
case BOOLEAN:
{
boolean v = ((BooleanObjectInspector) poi).get(o);
writeByte(buffer, (byte) (v ? 2 : 1), invert);
return;
}
case BYTE:
{
ByteObjectInspector boi = (ByteObjectInspector) poi;
byte v = boi.get(o);
writeByte(buffer, (byte) (v ^ 0x80), invert);
return;
}
case SHORT:
{
ShortObjectInspector spoi = (ShortObjectInspector) poi;
short v = spoi.get(o);
serializeShort(buffer, v, invert);
return;
}
case INT:
{
IntObjectInspector ioi = (IntObjectInspector) poi;
int v = ioi.get(o);
serializeInt(buffer, v, invert);
return;
}
case LONG:
{
LongObjectInspector loi = (LongObjectInspector) poi;
long v = loi.get(o);
serializeLong(buffer, v, invert);
return;
}
case FLOAT:
{
FloatObjectInspector foi = (FloatObjectInspector) poi;
serializeFloat(buffer, foi.get(o), invert);
return;
}
case DOUBLE:
{
DoubleObjectInspector doi = (DoubleObjectInspector) poi;
serializeDouble(buffer, doi.get(o), invert);
return;
}
case STRING:
{
StringObjectInspector soi = (StringObjectInspector) poi;
Text t = soi.getPrimitiveWritableObject(o);
serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
return;
}
case CHAR:
{
HiveCharObjectInspector hcoi = (HiveCharObjectInspector) poi;
HiveCharWritable hc = hcoi.getPrimitiveWritableObject(o);
// Trailing space should ignored for char comparisons.
// So write stripped values for this SerDe.
Text t = hc.getStrippedValue();
serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
return;
}
case VARCHAR:
{
HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi;
HiveVarcharWritable hc = hcoi.getPrimitiveWritableObject(o);
// use varchar's text field directly
Text t = hc.getTextValue();
serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
return;
}
case BINARY:
{
BinaryObjectInspector baoi = (BinaryObjectInspector) poi;
BytesWritable ba = baoi.getPrimitiveWritableObject(o);
byte[] toSer = new byte[ba.getLength()];
System.arraycopy(ba.getBytes(), 0, toSer, 0, ba.getLength());
serializeBytes(buffer, toSer, ba.getLength(), invert);
return;
}
case DATE:
{
DateObjectInspector doi = (DateObjectInspector) poi;
int v = doi.getPrimitiveWritableObject(o).getDays();
serializeInt(buffer, v, invert);
return;
}
case TIMESTAMP:
{
TimestampObjectInspector toi = (TimestampObjectInspector) poi;
TimestampWritable t = toi.getPrimitiveWritableObject(o);
serializeTimestampWritable(buffer, t, invert);
return;
}
case INTERVAL_YEAR_MONTH:
{
HiveIntervalYearMonthObjectInspector ioi = (HiveIntervalYearMonthObjectInspector) poi;
HiveIntervalYearMonth intervalYearMonth = ioi.getPrimitiveJavaObject(o);
serializeHiveIntervalYearMonth(buffer, intervalYearMonth, invert);
return;
}
case INTERVAL_DAY_TIME:
{
HiveIntervalDayTimeObjectInspector ioi = (HiveIntervalDayTimeObjectInspector) poi;
HiveIntervalDayTime intervalDayTime = ioi.getPrimitiveJavaObject(o);
serializeHiveIntervalDayTime(buffer, intervalDayTime, invert);
return;
}
case DECIMAL:
{
HiveDecimalObjectInspector boi = (HiveDecimalObjectInspector) poi;
HiveDecimal dec = boi.getPrimitiveJavaObject(o);
serializeHiveDecimal(buffer, dec, invert);
return;
}
default:
{
throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
}
}
}
case LIST:
{
ListObjectInspector loi = (ListObjectInspector) oi;
ObjectInspector eoi = loi.getListElementObjectInspector();
// \1 followed by each element
int size = loi.getListLength(o);
for (int eid = 0; eid < size; eid++) {
writeByte(buffer, (byte) 1, invert);
serialize(buffer, loi.getListElement(o, eid), eoi, invert, nullMarker, notNullMarker);
}
// and \0 to terminate
writeByte(buffer, (byte) 0, invert);
return;
}
case MAP:
{
MapObjectInspector moi = (MapObjectInspector) oi;
ObjectInspector koi = moi.getMapKeyObjectInspector();
ObjectInspector voi = moi.getMapValueObjectInspector();
// \1 followed by each key and then each value
Map<?, ?> map = moi.getMap(o);
for (Map.Entry<?, ?> entry : map.entrySet()) {
writeByte(buffer, (byte) 1, invert);
serialize(buffer, entry.getKey(), koi, invert, nullMarker, notNullMarker);
serialize(buffer, entry.getValue(), voi, invert, nullMarker, notNullMarker);
}
// and \0 to terminate
writeByte(buffer, (byte) 0, invert);
return;
}
case STRUCT:
{
StructObjectInspector soi = (StructObjectInspector) oi;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
for (int i = 0; i < fields.size(); i++) {
serialize(buffer, soi.getStructFieldData(o, fields.get(i)), fields.get(i).getFieldObjectInspector(), invert, nullMarker, notNullMarker);
}
return;
}
case UNION:
{
UnionObjectInspector uoi = (UnionObjectInspector) oi;
byte tag = uoi.getTag(o);
writeByte(buffer, tag, invert);
serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), invert, nullMarker, notNullMarker);
return;
}
default:
{
throw new RuntimeException("Unrecognized type: " + oi.getCategory());
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector in project hive by apache.
the class DataWritableWriter method createWriter.
/**
* Creates a writer for the specific object inspector. The returned writer will be used
* to call Parquet API for the specific data type.
* @param inspector The object inspector used to get the correct value type.
* @param type Type that contains information about the type schema.
* @return A ParquetWriter object used to call the Parquet API fo the specific data type.
*/
private DataWriter createWriter(ObjectInspector inspector, Type type) {
if (type.isPrimitive()) {
checkInspectorCategory(inspector, ObjectInspector.Category.PRIMITIVE);
PrimitiveObjectInspector primitiveInspector = (PrimitiveObjectInspector) inspector;
switch(primitiveInspector.getPrimitiveCategory()) {
case BOOLEAN:
return new BooleanDataWriter((BooleanObjectInspector) inspector);
case BYTE:
return new ByteDataWriter((ByteObjectInspector) inspector);
case SHORT:
return new ShortDataWriter((ShortObjectInspector) inspector);
case INT:
return new IntDataWriter((IntObjectInspector) inspector);
case LONG:
return new LongDataWriter((LongObjectInspector) inspector);
case FLOAT:
return new FloatDataWriter((FloatObjectInspector) inspector);
case DOUBLE:
return new DoubleDataWriter((DoubleObjectInspector) inspector);
case STRING:
return new StringDataWriter((StringObjectInspector) inspector);
case CHAR:
return new CharDataWriter((HiveCharObjectInspector) inspector);
case VARCHAR:
return new VarcharDataWriter((HiveVarcharObjectInspector) inspector);
case BINARY:
return new BinaryDataWriter((BinaryObjectInspector) inspector);
case TIMESTAMP:
return new TimestampDataWriter((TimestampObjectInspector) inspector);
case DECIMAL:
return new DecimalDataWriter((HiveDecimalObjectInspector) inspector);
case DATE:
return new DateDataWriter((DateObjectInspector) inspector);
default:
throw new IllegalArgumentException("Unsupported primitive data type: " + primitiveInspector.getPrimitiveCategory());
}
} else {
GroupType groupType = type.asGroupType();
OriginalType originalType = type.getOriginalType();
if (originalType != null && originalType.equals(OriginalType.LIST)) {
checkInspectorCategory(inspector, ObjectInspector.Category.LIST);
return new ListDataWriter((ListObjectInspector) inspector, groupType);
} else if (originalType != null && originalType.equals(OriginalType.MAP)) {
checkInspectorCategory(inspector, ObjectInspector.Category.MAP);
return new MapDataWriter((MapObjectInspector) inspector, groupType);
} else {
checkInspectorCategory(inspector, ObjectInspector.Category.STRUCT);
return new StructDataWriter((StructObjectInspector) inspector, groupType);
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector in project hive by apache.
the class StatsUtils method getAvgColLenOf.
/**
* Get the raw data size of variable length data types
* @param conf
* - hive conf
* @param oi
* - object inspector
* @param colType
* - column type
* @return raw data size
*/
public static long getAvgColLenOf(HiveConf conf, ObjectInspector oi, String colType) {
long configVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH);
String colTypeLowCase = colType.toLowerCase();
if (colTypeLowCase.equals(serdeConstants.STRING_TYPE_NAME)) {
// constant string projection Ex: select "hello" from table
if (oi instanceof ConstantObjectInspector) {
ConstantObjectInspector coi = (ConstantObjectInspector) oi;
// if writable constant is null then return size 0
Object constantValue = coi.getWritableConstantValue();
return constantValue == null ? 0 : constantValue.toString().length();
} else if (oi instanceof StringObjectInspector) {
// return the variable length from config
return configVarLen;
}
} else if (colTypeLowCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
// constant varchar projection
if (oi instanceof ConstantObjectInspector) {
ConstantObjectInspector coi = (ConstantObjectInspector) oi;
// if writable constant is null then return size 0
Object constantValue = coi.getWritableConstantValue();
return constantValue == null ? 0 : constantValue.toString().length();
} else if (oi instanceof HiveVarcharObjectInspector) {
VarcharTypeInfo type = (VarcharTypeInfo) ((HiveVarcharObjectInspector) oi).getTypeInfo();
return type.getLength();
}
} else if (colTypeLowCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
// constant char projection
if (oi instanceof ConstantObjectInspector) {
ConstantObjectInspector coi = (ConstantObjectInspector) oi;
// if writable constant is null then return size 0
Object constantValue = coi.getWritableConstantValue();
return constantValue == null ? 0 : constantValue.toString().length();
} else if (oi instanceof HiveCharObjectInspector) {
CharTypeInfo type = (CharTypeInfo) ((HiveCharObjectInspector) oi).getTypeInfo();
return type.getLength();
}
} else if (colTypeLowCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
// constant byte arrays
if (oi instanceof ConstantObjectInspector) {
ConstantObjectInspector coi = (ConstantObjectInspector) oi;
// if writable constant is null then return size 0
BytesWritable constantValue = (BytesWritable) coi.getWritableConstantValue();
return constantValue == null ? 0 : constantValue.getLength();
} else if (oi instanceof BinaryObjectInspector) {
// return the variable length from config
return configVarLen;
}
} else {
// complex types (map, list, struct, union)
return getSizeOfComplexTypes(conf, oi);
}
throw new IllegalArgumentException("Size requested for unknown type: " + colType + " OI: " + oi.getTypeName());
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector in project presto by prestodb.
the class SerDeUtils method serializePrimitive.
private static void serializePrimitive(Type type, BlockBuilder builder, Object object, PrimitiveObjectInspector inspector) {
requireNonNull(builder, "parent builder is null");
if (object == null) {
builder.appendNull();
return;
}
switch(inspector.getPrimitiveCategory()) {
case BOOLEAN:
BooleanType.BOOLEAN.writeBoolean(builder, ((BooleanObjectInspector) inspector).get(object));
return;
case BYTE:
TinyintType.TINYINT.writeLong(builder, ((ByteObjectInspector) inspector).get(object));
return;
case SHORT:
SmallintType.SMALLINT.writeLong(builder, ((ShortObjectInspector) inspector).get(object));
return;
case INT:
IntegerType.INTEGER.writeLong(builder, ((IntObjectInspector) inspector).get(object));
return;
case LONG:
BigintType.BIGINT.writeLong(builder, ((LongObjectInspector) inspector).get(object));
return;
case FLOAT:
RealType.REAL.writeLong(builder, floatToRawIntBits(((FloatObjectInspector) inspector).get(object)));
return;
case DOUBLE:
DoubleType.DOUBLE.writeDouble(builder, ((DoubleObjectInspector) inspector).get(object));
return;
case STRING:
type.writeSlice(builder, Slices.utf8Slice(((StringObjectInspector) inspector).getPrimitiveJavaObject(object)));
return;
case VARCHAR:
type.writeSlice(builder, Slices.utf8Slice(((HiveVarcharObjectInspector) inspector).getPrimitiveJavaObject(object).getValue()));
return;
case CHAR:
CharType charType = (CharType) type;
HiveChar hiveChar = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(object);
type.writeSlice(builder, trimSpacesAndTruncateToLength(Slices.utf8Slice(hiveChar.getValue()), charType.getLength()));
return;
case DATE:
DateType.DATE.writeLong(builder, formatDateAsLong(object, (DateObjectInspector) inspector));
return;
case TIMESTAMP:
TimestampType.TIMESTAMP.writeLong(builder, formatTimestampAsLong(object, (TimestampObjectInspector) inspector));
return;
case BINARY:
VARBINARY.writeSlice(builder, Slices.wrappedBuffer(((BinaryObjectInspector) inspector).getPrimitiveJavaObject(object)));
return;
case DECIMAL:
DecimalType decimalType = (DecimalType) type;
HiveDecimalWritable hiveDecimal = ((HiveDecimalObjectInspector) inspector).getPrimitiveWritableObject(object);
if (decimalType.isShort()) {
decimalType.writeLong(builder, DecimalUtils.getShortDecimalValue(hiveDecimal, decimalType.getScale()));
} else {
decimalType.writeSlice(builder, DecimalUtils.getLongDecimalValue(hiveDecimal, decimalType.getScale()));
}
return;
}
throw new RuntimeException("Unknown primitive type: " + inspector.getPrimitiveCategory());
}
Aggregations