use of org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo in project presto-hive-apache by prestodb.
the class JsonSerDe method extractCurrentField.
/**
* Utility method to extract current expected field from given JsonParser
*
* isTokenCurrent is a boolean variable also passed in, which determines
* if the JsonParser is already at the token we expect to read next, or
* needs advancing to the next before we read.
*/
private Object extractCurrentField(JsonParser p, HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws IOException {
Object val = null;
JsonToken valueToken;
if (isTokenCurrent) {
valueToken = p.getCurrentToken();
} else {
valueToken = p.nextToken();
}
switch(hcatFieldSchema.getType()) {
case INT:
val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getIntValue();
break;
case TINYINT:
val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getByteValue();
break;
case SMALLINT:
val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getShortValue();
break;
case BIGINT:
val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getLongValue();
break;
case BOOLEAN:
String bval = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
if (bval != null) {
val = Boolean.valueOf(bval);
} else {
val = null;
}
break;
case FLOAT:
val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getFloatValue();
break;
case DOUBLE:
val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getDoubleValue();
break;
case STRING:
val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
break;
case BINARY:
throw new IOException("JsonSerDe does not support BINARY type");
case DATE:
val = (valueToken == JsonToken.VALUE_NULL) ? null : Date.valueOf(p.getText());
break;
case TIMESTAMP:
val = (valueToken == JsonToken.VALUE_NULL) ? null : tsParser.parseTimestamp(p.getText());
break;
case DECIMAL:
val = (valueToken == JsonToken.VALUE_NULL) ? null : HiveDecimal.create(p.getText());
break;
case VARCHAR:
int vLen = ((BaseCharTypeInfo) hcatFieldSchema.getTypeInfo()).getLength();
val = (valueToken == JsonToken.VALUE_NULL) ? null : new HiveVarchar(p.getText(), vLen);
break;
case CHAR:
int cLen = ((BaseCharTypeInfo) hcatFieldSchema.getTypeInfo()).getLength();
val = (valueToken == JsonToken.VALUE_NULL) ? null : new HiveChar(p.getText(), cLen);
break;
case ARRAY:
if (valueToken == JsonToken.VALUE_NULL) {
val = null;
break;
}
if (valueToken != JsonToken.START_ARRAY) {
throw new IOException("Start of Array expected");
}
List<Object> arr = new ArrayList<Object>();
while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) {
arr.add(extractCurrentField(p, hcatFieldSchema.getArrayElementSchema().get(0), true));
}
val = arr;
break;
case MAP:
if (valueToken == JsonToken.VALUE_NULL) {
val = null;
break;
}
if (valueToken != JsonToken.START_OBJECT) {
throw new IOException("Start of Object expected");
}
Map<Object, Object> map = new LinkedHashMap<Object, Object>();
HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0);
while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(), hcatFieldSchema.getMapKeyTypeInfo());
Object v = extractCurrentField(p, valueSchema, false);
map.put(k, v);
}
val = map;
break;
case STRUCT:
if (valueToken == JsonToken.VALUE_NULL) {
val = null;
break;
}
if (valueToken != JsonToken.START_OBJECT) {
throw new IOException("Start of Object expected");
}
HCatSchema subSchema = hcatFieldSchema.getStructSubSchema();
int sz = subSchema.getFieldNames().size();
List<Object> struct = new ArrayList<Object>(Collections.nCopies(sz, null));
while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
populateRecord(struct, valueToken, p, subSchema);
}
val = struct;
break;
default:
LOG.error("Unknown type found: " + hcatFieldSchema.getType());
return null;
}
return val;
}
use of org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo in project flink by apache.
the class HiveParserTypeConverter method convert.
public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtFactory) {
RelDataType convertedType = null;
HiveShim hiveShim = HiveParserUtils.getSessionHiveShim();
switch(type.getPrimitiveCategory()) {
case VOID:
convertedType = dtFactory.createSqlType(SqlTypeName.NULL);
break;
case BOOLEAN:
convertedType = dtFactory.createSqlType(SqlTypeName.BOOLEAN);
break;
case BYTE:
convertedType = dtFactory.createSqlType(SqlTypeName.TINYINT);
break;
case SHORT:
convertedType = dtFactory.createSqlType(SqlTypeName.SMALLINT);
break;
case INT:
convertedType = dtFactory.createSqlType(SqlTypeName.INTEGER);
break;
case LONG:
convertedType = dtFactory.createSqlType(SqlTypeName.BIGINT);
break;
case FLOAT:
convertedType = dtFactory.createSqlType(SqlTypeName.FLOAT);
break;
case DOUBLE:
convertedType = dtFactory.createSqlType(SqlTypeName.DOUBLE);
break;
case STRING:
convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.VARCHAR, Integer.MAX_VALUE), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
break;
case DATE:
convertedType = dtFactory.createSqlType(SqlTypeName.DATE);
break;
case TIMESTAMP:
convertedType = dtFactory.createSqlType(SqlTypeName.TIMESTAMP, 9);
break;
case BINARY:
convertedType = dtFactory.createSqlType(SqlTypeName.BINARY);
break;
case DECIMAL:
DecimalTypeInfo dtInf = (DecimalTypeInfo) type;
convertedType = dtFactory.createSqlType(SqlTypeName.DECIMAL, dtInf.precision(), dtInf.scale());
break;
case VARCHAR:
convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.VARCHAR, ((BaseCharTypeInfo) type).getLength()), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
break;
case CHAR:
convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.CHAR, ((BaseCharTypeInfo) type).getLength()), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
break;
case UNKNOWN:
convertedType = dtFactory.createSqlType(SqlTypeName.OTHER);
break;
default:
if (hiveShim.isIntervalYearMonthType(type.getPrimitiveCategory())) {
convertedType = dtFactory.createSqlIntervalType(new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1)));
} else if (hiveShim.isIntervalDayTimeType(type.getPrimitiveCategory())) {
convertedType = dtFactory.createSqlIntervalType(new SqlIntervalQualifier(TimeUnit.DAY, TimeUnit.SECOND, new SqlParserPos(1, 1)));
}
}
if (null == convertedType) {
throw new RuntimeException("Unsupported Type : " + type.getTypeName());
}
return dtFactory.createTypeWithNullability(convertedType, true);
}
use of org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo in project hive by apache.
the class VectorizationContext method instantiateExpression.
public VectorExpression instantiateExpression(Class<?> vclass, TypeInfo returnTypeInfo, DataTypePhysicalVariation returnDataTypePhysicalVariation, Object... args) throws HiveException {
VectorExpression ve = null;
Constructor<?> ctor = getConstructor(vclass);
int numParams = ctor.getParameterTypes().length;
int argsLength = (args == null) ? 0 : args.length;
if (numParams == 0) {
try {
ve = (VectorExpression) ctor.newInstance();
} catch (Exception ex) {
throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + getStackTraceAsSingleLine(ex));
}
} else if (numParams == argsLength) {
try {
ve = (VectorExpression) ctor.newInstance(args);
} catch (Exception ex) {
throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + getStackTraceAsSingleLine(ex));
}
} else if (numParams == argsLength + 1) {
// Additional argument is needed, which is the outputcolumn.
Object[] newArgs = null;
try {
if (returnTypeInfo == null) {
throw new HiveException("Missing output type information");
}
String returnTypeName = returnTypeInfo.getTypeName();
// Special handling for decimal because decimal types need scale and precision parameter.
// This special handling should be avoided by using returnType uniformly for all cases.
final int outputColumnNum = ocm.allocateOutputColumn(returnTypeInfo, returnDataTypePhysicalVariation);
newArgs = Arrays.copyOf(Objects.requireNonNull(args), numParams);
newArgs[numParams - 1] = outputColumnNum;
ve = (VectorExpression) ctor.newInstance(newArgs);
/*
* Caller is responsible for setting children and input type information.
*/
ve.setOutputTypeInfo(returnTypeInfo);
ve.setOutputDataTypePhysicalVariation(returnDataTypePhysicalVariation);
} catch (Exception ex) {
throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + getStackTraceAsSingleLine(ex));
}
}
// Add maxLength parameter to UDFs that have CHAR or VARCHAR output.
if (ve instanceof TruncStringOutput) {
TruncStringOutput truncStringOutput = (TruncStringOutput) ve;
if (returnTypeInfo instanceof BaseCharTypeInfo) {
BaseCharTypeInfo baseCharTypeInfo = (BaseCharTypeInfo) returnTypeInfo;
truncStringOutput.setMaxLength(baseCharTypeInfo.getLength());
}
}
return ve;
}
use of org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo in project hive by apache.
the class TypeConverter method convert.
public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtFactory) {
RelDataType convertedType = null;
switch(type.getPrimitiveCategory()) {
case VOID:
convertedType = dtFactory.createSqlType(SqlTypeName.NULL);
break;
case BOOLEAN:
convertedType = dtFactory.createSqlType(SqlTypeName.BOOLEAN);
break;
case BYTE:
convertedType = dtFactory.createSqlType(SqlTypeName.TINYINT);
break;
case SHORT:
convertedType = dtFactory.createSqlType(SqlTypeName.SMALLINT);
break;
case INT:
convertedType = dtFactory.createSqlType(SqlTypeName.INTEGER);
break;
case LONG:
convertedType = dtFactory.createSqlType(SqlTypeName.BIGINT);
break;
case FLOAT:
convertedType = dtFactory.createSqlType(SqlTypeName.FLOAT);
break;
case DOUBLE:
convertedType = dtFactory.createSqlType(SqlTypeName.DOUBLE);
break;
case STRING:
convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.VARCHAR, Integer.MAX_VALUE), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
break;
case DATE:
convertedType = dtFactory.createSqlType(SqlTypeName.DATE);
break;
case TIMESTAMP:
convertedType = dtFactory.createSqlType(SqlTypeName.TIMESTAMP);
break;
case TIMESTAMPLOCALTZ:
convertedType = dtFactory.createSqlType(SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE);
break;
case INTERVAL_YEAR_MONTH:
convertedType = dtFactory.createSqlIntervalType(new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1)));
break;
case INTERVAL_DAY_TIME:
convertedType = dtFactory.createSqlIntervalType(new SqlIntervalQualifier(TimeUnit.DAY, TimeUnit.SECOND, new SqlParserPos(1, 1)));
break;
case BINARY:
convertedType = dtFactory.createSqlType(SqlTypeName.BINARY);
break;
case DECIMAL:
DecimalTypeInfo dtInf = (DecimalTypeInfo) type;
convertedType = dtFactory.createSqlType(SqlTypeName.DECIMAL, dtInf.precision(), dtInf.scale());
break;
case VARCHAR:
convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.VARCHAR, ((BaseCharTypeInfo) type).getLength()), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
break;
case CHAR:
convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.CHAR, ((BaseCharTypeInfo) type).getLength()), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
break;
case UNKNOWN:
convertedType = dtFactory.createSqlType(SqlTypeName.OTHER);
break;
}
if (null == convertedType) {
throw new RuntimeException("Unsupported Type : " + type.getTypeName());
}
return dtFactory.createTypeWithNullability(convertedType, true);
}
use of org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo in project hive by apache.
the class GenericUDFLower method initialize.
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != 1) {
throw new UDFArgumentLengthException("LOWER requires 1 argument, got " + arguments.length);
}
if (arguments[0].getCategory() != Category.PRIMITIVE) {
throw new UDFArgumentException("LOWER only takes primitive types, got " + arguments[0].getTypeName());
}
argumentOI = (PrimitiveObjectInspector) arguments[0];
stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI);
PrimitiveCategory inputType = argumentOI.getPrimitiveCategory();
ObjectInspector outputOI = null;
BaseCharTypeInfo typeInfo;
switch(inputType) {
case CHAR:
// return type should have same length as the input.
returnType = inputType;
typeInfo = TypeInfoFactory.getCharTypeInfo(GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI));
outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
break;
case VARCHAR:
// return type should have same length as the input.
returnType = inputType;
typeInfo = TypeInfoFactory.getVarcharTypeInfo(GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI));
outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
break;
default:
returnType = PrimitiveCategory.STRING;
outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
break;
}
returnHelper = new GenericUDFUtils.StringHelper(returnType);
return outputOI;
}
Aggregations