use of org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo in project hive by apache.
the class JsonSerDe method extractCurrentField.
/**
* Utility method to extract current expected field from given JsonParser
*
* isTokenCurrent is a boolean variable also passed in, which determines
* if the JsonParser is already at the token we expect to read next, or
* needs advancing to the next before we read.
*/
private Object extractCurrentField(JsonParser p, HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws IOException {
Object val = null;
JsonToken valueToken;
if (isTokenCurrent) {
valueToken = p.getCurrentToken();
} else {
valueToken = p.nextToken();
}
switch(hcatFieldSchema.getType()) {
case INT:
val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getIntValue();
break;
case TINYINT:
val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getByteValue();
break;
case SMALLINT:
val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getShortValue();
break;
case BIGINT:
val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getLongValue();
break;
case BOOLEAN:
String bval = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
if (bval != null) {
val = Boolean.valueOf(bval);
} else {
val = null;
}
break;
case FLOAT:
val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getFloatValue();
break;
case DOUBLE:
val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getDoubleValue();
break;
case STRING:
val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
break;
case BINARY:
String b = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
if (b != null) {
try {
String t = Text.decode(b.getBytes(), 0, b.getBytes().length);
return t.getBytes();
} catch (CharacterCodingException e) {
LOG.warn("Error generating json binary type from object.", e);
return null;
}
} else {
val = null;
}
break;
case DATE:
val = (valueToken == JsonToken.VALUE_NULL) ? null : Date.valueOf(p.getText());
break;
case TIMESTAMP:
val = (valueToken == JsonToken.VALUE_NULL) ? null : tsParser.parseTimestamp(p.getText());
break;
case DECIMAL:
val = (valueToken == JsonToken.VALUE_NULL) ? null : HiveDecimal.create(p.getText());
break;
case VARCHAR:
int vLen = ((BaseCharTypeInfo) hcatFieldSchema.getTypeInfo()).getLength();
val = (valueToken == JsonToken.VALUE_NULL) ? null : new HiveVarchar(p.getText(), vLen);
break;
case CHAR:
int cLen = ((BaseCharTypeInfo) hcatFieldSchema.getTypeInfo()).getLength();
val = (valueToken == JsonToken.VALUE_NULL) ? null : new HiveChar(p.getText(), cLen);
break;
case ARRAY:
if (valueToken == JsonToken.VALUE_NULL) {
val = null;
break;
}
if (valueToken != JsonToken.START_ARRAY) {
throw new IOException("Start of Array expected");
}
List<Object> arr = new ArrayList<Object>();
while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) {
arr.add(extractCurrentField(p, hcatFieldSchema.getArrayElementSchema().get(0), true));
}
val = arr;
break;
case MAP:
if (valueToken == JsonToken.VALUE_NULL) {
val = null;
break;
}
if (valueToken != JsonToken.START_OBJECT) {
throw new IOException("Start of Object expected");
}
Map<Object, Object> map = new LinkedHashMap<Object, Object>();
HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0);
while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(), hcatFieldSchema.getMapKeyTypeInfo());
Object v = extractCurrentField(p, valueSchema, false);
map.put(k, v);
}
val = map;
break;
case STRUCT:
if (valueToken == JsonToken.VALUE_NULL) {
val = null;
break;
}
if (valueToken != JsonToken.START_OBJECT) {
throw new IOException("Start of Object expected");
}
HCatSchema subSchema = hcatFieldSchema.getStructSubSchema();
int sz = subSchema.getFieldNames().size();
List<Object> struct = new ArrayList<Object>(Collections.nCopies(sz, null));
while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
populateRecord(struct, valueToken, p, subSchema);
}
val = struct;
break;
default:
LOG.error("Unknown type found: " + hcatFieldSchema.getType());
return null;
}
return val;
}
use of org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo in project hive by apache.
the class GenericUDFConcat method initialize.
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
// Loop through all the inputs to determine the appropriate return type/length.
// Return type:
// All CHAR inputs: return CHAR
// All VARCHAR inputs: return VARCHAR
// All CHAR/VARCHAR inputs: return VARCHAR
// All BINARY inputs: return BINARY
// Otherwise return STRING
argumentOIs = arguments;
PrimitiveCategory currentCategory;
PrimitiveObjectInspector poi;
boolean fixedLengthReturnValue = true;
// Only for char/varchar return types
int returnLength = 0;
for (int idx = 0; idx < arguments.length; ++idx) {
if (arguments[idx].getCategory() != Category.PRIMITIVE) {
throw new UDFArgumentException("CONCAT only takes primitive arguments");
}
poi = (PrimitiveObjectInspector) arguments[idx];
currentCategory = poi.getPrimitiveCategory();
if (idx == 0) {
returnType = currentCategory;
}
switch(currentCategory) {
case BINARY:
fixedLengthReturnValue = false;
if (returnType != currentCategory) {
// mix of binary/non-binary args
returnType = PrimitiveCategory.STRING;
}
break;
case CHAR:
case VARCHAR:
if (!fixedLengthReturnValue) {
returnType = PrimitiveCategory.STRING;
}
if (fixedLengthReturnValue && currentCategory == PrimitiveCategory.VARCHAR) {
returnType = PrimitiveCategory.VARCHAR;
}
break;
default:
returnType = PrimitiveCategory.STRING;
fixedLengthReturnValue = false;
break;
}
// max length for the char/varchar, then the return type reverts to string.
if (fixedLengthReturnValue) {
returnLength += GenericUDFUtils.StringHelper.getFixedStringSizeForType(poi);
if ((returnType == PrimitiveCategory.VARCHAR && returnLength > HiveVarchar.MAX_VARCHAR_LENGTH) || (returnType == PrimitiveCategory.CHAR && returnLength > HiveChar.MAX_CHAR_LENGTH)) {
returnType = PrimitiveCategory.STRING;
fixedLengthReturnValue = false;
}
}
}
if (returnType == PrimitiveCategory.BINARY) {
bw = new BytesWritable[arguments.length];
return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
} else {
// treat all inputs as string, the return value will be converted to the appropriate type.
createStringConverters();
returnHelper = new GenericUDFUtils.StringHelper(returnType);
BaseCharTypeInfo typeInfo;
switch(returnType) {
case STRING:
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
case CHAR:
typeInfo = TypeInfoFactory.getCharTypeInfo(returnLength);
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
case VARCHAR:
typeInfo = TypeInfoFactory.getVarcharTypeInfo(returnLength);
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
default:
throw new UDFArgumentException("Unexpected CONCAT return type of " + returnType);
}
}
}
Aggregations