use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project hive by apache.
the class ColumnStatsTask method unpackPrimitiveObject.
private void unpackPrimitiveObject(ObjectInspector oi, Object o, String fieldName, ColumnStatisticsObj statsObj) {
if (o == null) {
return;
}
// First infer the type of object
if (fieldName.equals("columntype")) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
ColumnStatisticsData statsData = new ColumnStatisticsData();
if (s.equalsIgnoreCase("long")) {
LongColumnStatsData longStats = new LongColumnStatsData();
statsData.setLongStats(longStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("double")) {
DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
statsData.setDoubleStats(doubleStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("string")) {
StringColumnStatsData stringStats = new StringColumnStatsData();
statsData.setStringStats(stringStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("boolean")) {
BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
statsData.setBooleanStats(booleanStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
statsData.setBinaryStats(binaryStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("decimal")) {
DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
statsData.setDecimalStats(decimalStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("date")) {
DateColumnStatsData dateStats = new DateColumnStatsData();
statsData.setDateStats(dateStats);
statsObj.setStatsData(statsData);
}
} else {
// invoke the right unpack method depending on data type of the column
if (statsObj.getStatsData().isSetBooleanStats()) {
unpackBooleanStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetLongStats()) {
unpackLongStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetDoubleStats()) {
unpackDoubleStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetStringStats()) {
unpackStringStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetBinaryStats()) {
unpackBinaryStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetDecimalStats()) {
unpackDecimalStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetDateStats()) {
unpackDateStats(oi, o, fieldName, statsObj);
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project hive by apache.
the class ColumnStatsTask method unpackDecimalStats.
private void unpackDecimalStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) {
if (fName.equals("countnulls")) {
long v = ((LongObjectInspector) oi).get(o);
statsObj.getStatsData().getDecimalStats().setNumNulls(v);
} else if (fName.equals("numdistinctvalues")) {
long v = ((LongObjectInspector) oi).get(o);
statsObj.getStatsData().getDecimalStats().setNumDVs(v);
} else if (fName.equals("max")) {
HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o);
statsObj.getStatsData().getDecimalStats().setHighValue(convertToThriftDecimal(d));
} else if (fName.equals("min")) {
HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o);
statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d));
} else if (fName.equals("ndvbitvector")) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
statsObj.getStatsData().getDecimalStats().setBitVectors(v);
;
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project hive by apache.
the class JsonSerDe method buildJSONString.
// TODO : code section copied over from SerDeUtils because of non-standard json production there
// should use quotes for all field names. We should fix this there, and then remove this copy.
// See http://jackson.codehaus.org/1.7.3/javadoc/org/codehaus/jackson/JsonParser.Feature.html#ALLOW_UNQUOTED_FIELD_NAMES
// for details - trying to enable Jackson to ignore that doesn't seem to work(compilation failure
// when attempting to use that feature, so having to change the production itself.
// Also, throws IOException when Binary is detected.
private static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi) throws IOException {
switch(oi.getCategory()) {
case PRIMITIVE:
{
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
if (o == null) {
sb.append("null");
} else {
switch(poi.getPrimitiveCategory()) {
case BOOLEAN:
{
boolean b = ((BooleanObjectInspector) poi).get(o);
sb.append(b ? "true" : "false");
break;
}
case BYTE:
{
sb.append(((ByteObjectInspector) poi).get(o));
break;
}
case SHORT:
{
sb.append(((ShortObjectInspector) poi).get(o));
break;
}
case INT:
{
sb.append(((IntObjectInspector) poi).get(o));
break;
}
case LONG:
{
sb.append(((LongObjectInspector) poi).get(o));
break;
}
case FLOAT:
{
sb.append(((FloatObjectInspector) poi).get(o));
break;
}
case DOUBLE:
{
sb.append(((DoubleObjectInspector) poi).get(o));
break;
}
case STRING:
{
String s = SerDeUtils.escapeString(((StringObjectInspector) poi).getPrimitiveJavaObject(o));
appendWithQuotes(sb, s);
break;
}
case BINARY:
{
throw new IOException("JsonSerDe does not support BINARY type");
}
case DATE:
Date d = ((DateObjectInspector) poi).getPrimitiveJavaObject(o);
appendWithQuotes(sb, d.toString());
break;
case TIMESTAMP:
{
Timestamp t = ((TimestampObjectInspector) poi).getPrimitiveJavaObject(o);
appendWithQuotes(sb, t.toString());
break;
}
case DECIMAL:
sb.append(((HiveDecimalObjectInspector) poi).getPrimitiveJavaObject(o));
break;
case VARCHAR:
{
String s = SerDeUtils.escapeString(((HiveVarcharObjectInspector) poi).getPrimitiveJavaObject(o).toString());
appendWithQuotes(sb, s);
break;
}
case CHAR:
{
//this should use HiveChar.getPaddedValue() but it's protected; currently (v0.13)
// HiveChar.toString() returns getPaddedValue()
String s = SerDeUtils.escapeString(((HiveCharObjectInspector) poi).getPrimitiveJavaObject(o).toString());
appendWithQuotes(sb, s);
break;
}
default:
throw new RuntimeException("Unknown primitive type: " + poi.getPrimitiveCategory());
}
}
break;
}
case LIST:
{
ListObjectInspector loi = (ListObjectInspector) oi;
ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
List<?> olist = loi.getList(o);
if (olist == null) {
sb.append("null");
} else {
sb.append(SerDeUtils.LBRACKET);
for (int i = 0; i < olist.size(); i++) {
if (i > 0) {
sb.append(SerDeUtils.COMMA);
}
buildJSONString(sb, olist.get(i), listElementObjectInspector);
}
sb.append(SerDeUtils.RBRACKET);
}
break;
}
case MAP:
{
MapObjectInspector moi = (MapObjectInspector) oi;
ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector();
Map<?, ?> omap = moi.getMap(o);
if (omap == null) {
sb.append("null");
} else {
sb.append(SerDeUtils.LBRACE);
boolean first = true;
for (Object entry : omap.entrySet()) {
if (first) {
first = false;
} else {
sb.append(SerDeUtils.COMMA);
}
Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
StringBuilder keyBuilder = new StringBuilder();
buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector);
String keyString = keyBuilder.toString().trim();
if ((!keyString.isEmpty()) && (keyString.charAt(0) != SerDeUtils.QUOTE)) {
appendWithQuotes(sb, keyString);
} else {
sb.append(keyString);
}
sb.append(SerDeUtils.COLON);
buildJSONString(sb, e.getValue(), mapValueObjectInspector);
}
sb.append(SerDeUtils.RBRACE);
}
break;
}
case STRUCT:
{
StructObjectInspector soi = (StructObjectInspector) oi;
List<? extends StructField> structFields = soi.getAllStructFieldRefs();
if (o == null) {
sb.append("null");
} else {
sb.append(SerDeUtils.LBRACE);
for (int i = 0; i < structFields.size(); i++) {
if (i > 0) {
sb.append(SerDeUtils.COMMA);
}
appendWithQuotes(sb, structFields.get(i).getFieldName());
sb.append(SerDeUtils.COLON);
buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)), structFields.get(i).getFieldObjectInspector());
}
sb.append(SerDeUtils.RBRACE);
}
break;
}
case UNION:
{
UnionObjectInspector uoi = (UnionObjectInspector) oi;
if (o == null) {
sb.append("null");
} else {
sb.append(SerDeUtils.LBRACE);
sb.append(uoi.getTag(o));
sb.append(SerDeUtils.COLON);
buildJSONString(sb, uoi.getField(o), uoi.getObjectInspectors().get(uoi.getTag(o)));
sb.append(SerDeUtils.RBRACE);
}
break;
}
default:
throw new RuntimeException("Unknown type in ObjectInspector!");
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project SQLWindowing by hbutani.
the class WindowingKeySerializer method serialize.
/*
* copied from BinarySortableSerDe::serialize
*/
static void serialize(OutputByteBuffer buffer, Object o, ObjectInspector oi, boolean invert) {
// Is this field a null?
if (o == null) {
buffer.write((byte) 0, invert);
return;
}
// This field is not a null.
buffer.write((byte) 1, invert);
switch(oi.getCategory()) {
case PRIMITIVE:
{
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
switch(poi.getPrimitiveCategory()) {
case VOID:
{
return;
}
case BOOLEAN:
{
boolean v = ((BooleanObjectInspector) poi).get(o);
buffer.write((byte) (v ? 2 : 1), invert);
return;
}
case BYTE:
{
ByteObjectInspector boi = (ByteObjectInspector) poi;
byte v = boi.get(o);
buffer.write((byte) (v ^ 0x80), invert);
return;
}
case SHORT:
{
ShortObjectInspector spoi = (ShortObjectInspector) poi;
short v = spoi.get(o);
buffer.write((byte) ((v >> 8) ^ 0x80), invert);
buffer.write((byte) v, invert);
return;
}
case INT:
{
IntObjectInspector ioi = (IntObjectInspector) poi;
int v = ioi.get(o);
buffer.write((byte) ((v >> 24) ^ 0x80), invert);
buffer.write((byte) (v >> 16), invert);
buffer.write((byte) (v >> 8), invert);
buffer.write((byte) v, invert);
return;
}
case LONG:
{
LongObjectInspector loi = (LongObjectInspector) poi;
long v = loi.get(o);
buffer.write((byte) ((v >> 56) ^ 0x80), invert);
buffer.write((byte) (v >> 48), invert);
buffer.write((byte) (v >> 40), invert);
buffer.write((byte) (v >> 32), invert);
buffer.write((byte) (v >> 24), invert);
buffer.write((byte) (v >> 16), invert);
buffer.write((byte) (v >> 8), invert);
buffer.write((byte) v, invert);
return;
}
case FLOAT:
{
FloatObjectInspector foi = (FloatObjectInspector) poi;
int v = Float.floatToIntBits(foi.get(o));
if ((v & (1 << 31)) != 0) {
// negative number, flip all bits
v = ~v;
} else {
// positive number, flip the first bit
v = v ^ (1 << 31);
}
buffer.write((byte) (v >> 24), invert);
buffer.write((byte) (v >> 16), invert);
buffer.write((byte) (v >> 8), invert);
buffer.write((byte) v, invert);
return;
}
case DOUBLE:
{
DoubleObjectInspector doi = (DoubleObjectInspector) poi;
long v = Double.doubleToLongBits(doi.get(o));
if ((v & (1L << 63)) != 0) {
// negative number, flip all bits
v = ~v;
} else {
// positive number, flip the first bit
v = v ^ (1L << 63);
}
buffer.write((byte) (v >> 56), invert);
buffer.write((byte) (v >> 48), invert);
buffer.write((byte) (v >> 40), invert);
buffer.write((byte) (v >> 32), invert);
buffer.write((byte) (v >> 24), invert);
buffer.write((byte) (v >> 16), invert);
buffer.write((byte) (v >> 8), invert);
buffer.write((byte) v, invert);
return;
}
case STRING:
{
StringObjectInspector soi = (StringObjectInspector) poi;
Text t = soi.getPrimitiveWritableObject(o);
byte[] data = t.getBytes();
int length = t.getLength();
for (int i = 0; i < length; i++) {
if (data[i] == 0 || data[i] == 1) {
buffer.write((byte) 1, invert);
buffer.write((byte) (data[i] + 1), invert);
} else {
buffer.write(data[i], invert);
}
}
buffer.write((byte) 0, invert);
return;
}
default:
{
throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
}
}
}
default:
{
throw new RuntimeException("Unsupported type in WindowingKey : " + oi.getCategory());
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project hive by apache.
the class LazyBinarySerDe method serialize.
/**
* A recursive function that serialize an object to a byte buffer based on its
* object inspector.
*
* @param byteStream
* the byte stream storing the serialization data
* @param obj
* the object to serialize
* @param objInspector
* the object inspector
* @param skipLengthPrefix a boolean indicating whether length prefix is
* needed for list/map/struct
* @param warnedOnceNullMapKey a boolean indicating whether a warning
* has been issued once already when encountering null map keys
*/
public static void serialize(RandomAccessOutput byteStream, Object obj, ObjectInspector objInspector, boolean skipLengthPrefix, BooleanRef warnedOnceNullMapKey) throws SerDeException {
// do nothing for null object
if (null == obj) {
return;
}
switch(objInspector.getCategory()) {
case PRIMITIVE:
{
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) objInspector;
switch(poi.getPrimitiveCategory()) {
case VOID:
{
return;
}
case BOOLEAN:
{
boolean v = ((BooleanObjectInspector) poi).get(obj);
byteStream.write((byte) (v ? 1 : 0));
return;
}
case BYTE:
{
ByteObjectInspector boi = (ByteObjectInspector) poi;
byte v = boi.get(obj);
byteStream.write(v);
return;
}
case SHORT:
{
ShortObjectInspector spoi = (ShortObjectInspector) poi;
short v = spoi.get(obj);
byteStream.write((byte) (v >> 8));
byteStream.write((byte) (v));
return;
}
case INT:
{
IntObjectInspector ioi = (IntObjectInspector) poi;
int v = ioi.get(obj);
LazyBinaryUtils.writeVInt(byteStream, v);
return;
}
case LONG:
{
LongObjectInspector loi = (LongObjectInspector) poi;
long v = loi.get(obj);
LazyBinaryUtils.writeVLong(byteStream, v);
return;
}
case FLOAT:
{
FloatObjectInspector foi = (FloatObjectInspector) poi;
int v = Float.floatToIntBits(foi.get(obj));
byteStream.write((byte) (v >> 24));
byteStream.write((byte) (v >> 16));
byteStream.write((byte) (v >> 8));
byteStream.write((byte) (v));
return;
}
case DOUBLE:
{
DoubleObjectInspector doi = (DoubleObjectInspector) poi;
LazyBinaryUtils.writeDouble(byteStream, doi.get(obj));
return;
}
case STRING:
{
StringObjectInspector soi = (StringObjectInspector) poi;
Text t = soi.getPrimitiveWritableObject(obj);
serializeText(byteStream, t, skipLengthPrefix);
return;
}
case CHAR:
{
HiveCharObjectInspector hcoi = (HiveCharObjectInspector) poi;
Text t = hcoi.getPrimitiveWritableObject(obj).getTextValue();
serializeText(byteStream, t, skipLengthPrefix);
return;
}
case VARCHAR:
{
HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi;
Text t = hcoi.getPrimitiveWritableObject(obj).getTextValue();
serializeText(byteStream, t, skipLengthPrefix);
return;
}
case BINARY:
{
BinaryObjectInspector baoi = (BinaryObjectInspector) poi;
BytesWritable bw = baoi.getPrimitiveWritableObject(obj);
int length = bw.getLength();
if (!skipLengthPrefix) {
LazyBinaryUtils.writeVInt(byteStream, length);
} else {
if (length == 0) {
throw new RuntimeException("LazyBinaryColumnarSerde cannot serialize a non-null zero " + "length binary field. Consider using either LazyBinarySerde or ColumnarSerde.");
}
}
byteStream.write(bw.getBytes(), 0, length);
return;
}
case DATE:
{
DateWritable d = ((DateObjectInspector) poi).getPrimitiveWritableObject(obj);
writeDateToByteStream(byteStream, d);
return;
}
case TIMESTAMP:
{
TimestampObjectInspector toi = (TimestampObjectInspector) poi;
TimestampWritable t = toi.getPrimitiveWritableObject(obj);
t.writeToByteStream(byteStream);
return;
}
case INTERVAL_YEAR_MONTH:
{
HiveIntervalYearMonthWritable intervalYearMonth = ((HiveIntervalYearMonthObjectInspector) poi).getPrimitiveWritableObject(obj);
intervalYearMonth.writeToByteStream(byteStream);
return;
}
case INTERVAL_DAY_TIME:
{
HiveIntervalDayTimeWritable intervalDayTime = ((HiveIntervalDayTimeObjectInspector) poi).getPrimitiveWritableObject(obj);
intervalDayTime.writeToByteStream(byteStream);
return;
}
case DECIMAL:
{
HiveDecimalObjectInspector bdoi = (HiveDecimalObjectInspector) poi;
HiveDecimalWritable t = bdoi.getPrimitiveWritableObject(obj);
if (t == null) {
return;
}
writeToByteStream(byteStream, t);
return;
}
default:
{
throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
}
}
}
case LIST:
{
ListObjectInspector loi = (ListObjectInspector) objInspector;
ObjectInspector eoi = loi.getListElementObjectInspector();
int byteSizeStart = 0;
int listStart = 0;
if (!skipLengthPrefix) {
// 1/ reserve spaces for the byte size of the list
// which is a integer and takes four bytes
byteSizeStart = byteStream.getLength();
byteStream.reserve(4);
listStart = byteStream.getLength();
}
// 2/ write the size of the list as a VInt
int size = loi.getListLength(obj);
LazyBinaryUtils.writeVInt(byteStream, size);
// 3/ write the null bytes
byte nullByte = 0;
for (int eid = 0; eid < size; eid++) {
// set the bit to 1 if an element is not null
if (null != loi.getListElement(obj, eid)) {
nullByte |= 1 << (eid % 8);
}
// if this is the last element
if (7 == eid % 8 || eid == size - 1) {
byteStream.write(nullByte);
nullByte = 0;
}
}
// 4/ write element by element from the list
for (int eid = 0; eid < size; eid++) {
serialize(byteStream, loi.getListElement(obj, eid), eoi, false, warnedOnceNullMapKey);
}
if (!skipLengthPrefix) {
// 5/ update the list byte size
int listEnd = byteStream.getLength();
int listSize = listEnd - listStart;
writeSizeAtOffset(byteStream, byteSizeStart, listSize);
}
return;
}
case MAP:
{
MapObjectInspector moi = (MapObjectInspector) objInspector;
ObjectInspector koi = moi.getMapKeyObjectInspector();
ObjectInspector voi = moi.getMapValueObjectInspector();
Map<?, ?> map = moi.getMap(obj);
int byteSizeStart = 0;
int mapStart = 0;
if (!skipLengthPrefix) {
// 1/ reserve spaces for the byte size of the map
// which is a integer and takes four bytes
byteSizeStart = byteStream.getLength();
byteStream.reserve(4);
mapStart = byteStream.getLength();
}
// 2/ write the size of the map which is a VInt
int size = map.size();
LazyBinaryUtils.writeVInt(byteStream, size);
// 3/ write the null bytes
int b = 0;
byte nullByte = 0;
for (Map.Entry<?, ?> entry : map.entrySet()) {
// set the bit to 1 if a key is not null
if (null != entry.getKey()) {
nullByte |= 1 << (b % 8);
} else if (warnedOnceNullMapKey != null) {
if (!warnedOnceNullMapKey.value) {
LOG.warn("Null map key encountered! Ignoring similar problems.");
}
warnedOnceNullMapKey.value = true;
}
b++;
// set the bit to 1 if a value is not null
if (null != entry.getValue()) {
nullByte |= 1 << (b % 8);
}
b++;
// or if this is the last key-value pair
if (0 == b % 8 || b == size * 2) {
byteStream.write(nullByte);
nullByte = 0;
}
}
// 4/ write key-value pairs one by one
for (Map.Entry<?, ?> entry : map.entrySet()) {
serialize(byteStream, entry.getKey(), koi, false, warnedOnceNullMapKey);
serialize(byteStream, entry.getValue(), voi, false, warnedOnceNullMapKey);
}
if (!skipLengthPrefix) {
// 5/ update the byte size of the map
int mapEnd = byteStream.getLength();
int mapSize = mapEnd - mapStart;
writeSizeAtOffset(byteStream, byteSizeStart, mapSize);
}
return;
}
case STRUCT:
case UNION:
{
int byteSizeStart = 0;
int typeStart = 0;
if (!skipLengthPrefix) {
// 1/ reserve spaces for the byte size of the struct
// which is a integer and takes four bytes
byteSizeStart = byteStream.getLength();
byteStream.reserve(4);
typeStart = byteStream.getLength();
}
if (ObjectInspector.Category.STRUCT.equals(objInspector.getCategory())) {
// 2/ serialize the struct
serializeStruct(byteStream, obj, (StructObjectInspector) objInspector, warnedOnceNullMapKey);
} else {
// 2/ serialize the union
serializeUnion(byteStream, obj, (UnionObjectInspector) objInspector, warnedOnceNullMapKey);
}
if (!skipLengthPrefix) {
// 3/ update the byte size of the struct
int typeEnd = byteStream.getLength();
int typeSize = typeEnd - typeStart;
writeSizeAtOffset(byteStream, byteSizeStart, typeSize);
}
return;
}
default:
{
throw new RuntimeException("Unrecognized type: " + objInspector.getCategory());
}
}
}
Aggregations