use of org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo in project hive by apache.
the class HCatBaseStorer method getJavaObj.
/**
* Convert from Pig value object to Hive value object
* This method assumes that {@link #validateSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema, org.apache.hive.hcatalog.data.schema.HCatFieldSchema, org.apache.pig.impl.logicalLayer.schema.Schema, org.apache.hive.hcatalog.data.schema.HCatSchema, int)}
* which checks the types in Pig schema are compatible with target Hive table, has been called.
*/
private Object getJavaObj(Object pigObj, HCatFieldSchema hcatFS) throws HCatException, BackendException {
try {
if (pigObj == null)
return null;
// The real work-horse. Spend time and energy in this method if there is
// need to keep HCatStorer lean and go fast.
Type type = hcatFS.getType();
switch(type) {
case BINARY:
return ((DataByteArray) pigObj).get();
case STRUCT:
HCatSchema structSubSchema = hcatFS.getStructSubSchema();
// Unwrap the tuple.
List<Object> all = ((Tuple) pigObj).getAll();
ArrayList<Object> converted = new ArrayList<Object>(all.size());
for (int i = 0; i < all.size(); i++) {
converted.add(getJavaObj(all.get(i), structSubSchema.get(i)));
}
return converted;
case ARRAY:
// Unwrap the bag.
DataBag pigBag = (DataBag) pigObj;
HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0);
boolean needTuple = tupFS.getType() == Type.STRUCT;
List<Object> bagContents = new ArrayList<Object>((int) pigBag.size());
Iterator<Tuple> bagItr = pigBag.iterator();
while (bagItr.hasNext()) {
// If there is only one element in tuple contained in bag, we throw away the tuple.
bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS));
}
return bagContents;
case MAP:
Map<?, ?> pigMap = (Map<?, ?>) pigObj;
Map<Object, Object> typeMap = new HashMap<Object, Object>();
for (Entry<?, ?> entry : pigMap.entrySet()) {
// the value has a schema and not a FieldSchema
typeMap.put(// Schema validation enforces that the Key is a String
(String) entry.getKey(), getJavaObj(entry.getValue(), hcatFS.getMapValueSchema().get(0)));
}
return typeMap;
case STRING:
case INT:
case BIGINT:
case FLOAT:
case DOUBLE:
return pigObj;
case SMALLINT:
if ((Integer) pigObj < Short.MIN_VALUE || (Integer) pigObj > Short.MAX_VALUE) {
handleOutOfRangeValue(pigObj, hcatFS);
return null;
}
return ((Integer) pigObj).shortValue();
case TINYINT:
if ((Integer) pigObj < Byte.MIN_VALUE || (Integer) pigObj > Byte.MAX_VALUE) {
handleOutOfRangeValue(pigObj, hcatFS);
return null;
}
return ((Integer) pigObj).byteValue();
case BOOLEAN:
if (pigObj instanceof String) {
if (((String) pigObj).trim().compareTo("0") == 0) {
return Boolean.FALSE;
}
if (((String) pigObj).trim().compareTo("1") == 0) {
return Boolean.TRUE;
}
throw new BackendException("Unexpected type " + type + " for value " + pigObj + " of class " + pigObj.getClass().getName(), PigHCatUtil.PIG_EXCEPTION_CODE);
}
return Boolean.parseBoolean(pigObj.toString());
case DECIMAL:
BigDecimal bd = (BigDecimal) pigObj;
DecimalTypeInfo dti = (DecimalTypeInfo) hcatFS.getTypeInfo();
if (bd.precision() > dti.precision() || bd.scale() > dti.scale()) {
handleOutOfRangeValue(pigObj, hcatFS);
return null;
}
return HiveDecimal.create(bd);
case CHAR:
String charVal = (String) pigObj;
CharTypeInfo cti = (CharTypeInfo) hcatFS.getTypeInfo();
if (charVal.length() > cti.getLength()) {
handleOutOfRangeValue(pigObj, hcatFS);
return null;
}
return new HiveChar(charVal, cti.getLength());
case VARCHAR:
String varcharVal = (String) pigObj;
VarcharTypeInfo vti = (VarcharTypeInfo) hcatFS.getTypeInfo();
if (varcharVal.length() > vti.getLength()) {
handleOutOfRangeValue(pigObj, hcatFS);
return null;
}
return new HiveVarchar(varcharVal, vti.getLength());
case TIMESTAMP:
DateTime dt = (DateTime) pigObj;
// getMillis() returns UTC time regardless of TZ
return new Timestamp(dt.getMillis());
case DATE:
/**
* We ignore any TZ setting on Pig value since java.sql.Date doesn't have it (in any
* meaningful way). So the assumption is that if Pig value has 0 time component (midnight)
* we assume it reasonably 'fits' into a Hive DATE. If time part is not 0, it's considered
* out of range for target type.
*/
DateTime dateTime = ((DateTime) pigObj);
if (dateTime.getMillisOfDay() != 0) {
handleOutOfRangeValue(pigObj, hcatFS, "Time component must be 0 (midnight) in local timezone; Local TZ val='" + pigObj + "'");
return null;
}
/*java.sql.Date is a poorly defined API. Some (all?) SerDes call toString() on it
[e.g. LazySimpleSerDe, uses LazyUtils.writePrimitiveUTF8()], which automatically adjusts
for local timezone. Date.valueOf() also uses local timezone (as does Date(int,int,int).
Also see PigHCatUtil#extractPigObject() for corresponding read op. This way a DATETIME from Pig,
when stored into Hive and read back comes back with the same value.*/
return new Date(dateTime.getYear() - 1900, dateTime.getMonthOfYear() - 1, dateTime.getDayOfMonth());
default:
throw new BackendException("Unexpected HCat type " + type + " for value " + pigObj + " of class " + pigObj.getClass().getName(), PigHCatUtil.PIG_EXCEPTION_CODE);
}
} catch (BackendException e) {
// provide the path to the field in the error message
throw new BackendException((hcatFS.getName() == null ? " " : hcatFS.getName() + ".") + e.getMessage(), e);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo in project hive by apache.
the class VectorizationContext method checkExprNodeDescForDecimal64.
private boolean checkExprNodeDescForDecimal64(ExprNodeDesc exprNodeDesc) throws HiveException {
if (exprNodeDesc instanceof ExprNodeColumnDesc) {
int colIndex = getInputColumnIndex((ExprNodeColumnDesc) exprNodeDesc);
DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex);
return (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64);
} else if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) {
// Is the result Decimal64 precision?
TypeInfo returnType = exprNodeDesc.getTypeInfo();
if (!checkTypeInfoForDecimal64(returnType)) {
return false;
}
DecimalTypeInfo returnDecimalType = (DecimalTypeInfo) returnType;
GenericUDF udf = ((ExprNodeGenericFuncDesc) exprNodeDesc).getGenericUDF();
Class<?> udfClass = udf.getClass();
// We have a class-level annotation that says whether the UDF's vectorization expressions
// support Decimal64.
VectorizedExpressionsSupportDecimal64 annotation = AnnotationUtils.getAnnotation(udfClass, VectorizedExpressionsSupportDecimal64.class);
if (annotation == null) {
return false;
}
// Carefully check the children to make sure they are Decimal64.
List<ExprNodeDesc> children = exprNodeDesc.getChildren();
for (ExprNodeDesc childExprNodeDesc : children) {
if (childExprNodeDesc instanceof ExprNodeConstantDesc) {
DecimalTypeInfo childDecimalTypeInfo = decimalTypeFromCastToDecimal(childExprNodeDesc, returnDecimalType);
if (childDecimalTypeInfo == null) {
return false;
}
if (!checkTypeInfoForDecimal64(childDecimalTypeInfo)) {
return false;
}
continue;
}
// Otherwise, recurse.
if (!checkExprNodeDescForDecimal64(childExprNodeDesc)) {
return false;
}
}
return true;
} else if (exprNodeDesc instanceof ExprNodeConstantDesc) {
return checkTypeInfoForDecimal64(exprNodeDesc.getTypeInfo());
}
return false;
}
use of org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo in project hive by apache.
the class VectorizationContext method decimalTypeFromCastToDecimal.
/*
* This method must return the decimal TypeInfo for what getCastToDecimal will produce.
*/
private DecimalTypeInfo decimalTypeFromCastToDecimal(ExprNodeDesc exprNodeDesc, DecimalTypeInfo returnDecimalType) throws HiveException {
if (exprNodeDesc instanceof ExprNodeConstantDesc) {
// Return a constant vector expression
Object constantValue = ((ExprNodeConstantDesc) exprNodeDesc).getValue();
HiveDecimal decimalValue = castConstantToDecimal(constantValue, exprNodeDesc.getTypeInfo());
if (decimalValue == null) {
// Return something.
return returnDecimalType;
}
return new DecimalTypeInfo(decimalValue.precision(), decimalValue.scale());
}
String inputType = exprNodeDesc.getTypeString();
if (isIntFamily(inputType) || isFloatFamily(inputType) || decimalTypePattern.matcher(inputType).matches() || isStringFamily(inputType) || inputType.equals("timestamp")) {
return returnDecimalType;
}
return null;
}
use of org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo in project hive by apache.
the class VectorizationContext method getImplicitCastExpression.
/**
* The GenericUDFs might need their children output to be cast to the given castType.
* This method returns a cast expression that would achieve the required casting.
*/
private ExprNodeDesc getImplicitCastExpression(GenericUDF udf, ExprNodeDesc child, TypeInfo castType) throws HiveException {
TypeInfo inputTypeInfo = child.getTypeInfo();
String inputTypeString = inputTypeInfo.getTypeName();
String castTypeString = castType.getTypeName();
if (inputTypeString.equals(castTypeString)) {
// Nothing to be done
return null;
}
boolean inputTypeDecimal = false;
boolean castTypeDecimal = false;
if (decimalTypePattern.matcher(inputTypeString).matches()) {
inputTypeDecimal = true;
}
if (decimalTypePattern.matcher(castTypeString).matches()) {
castTypeDecimal = true;
}
if (castTypeDecimal && !inputTypeDecimal) {
if (needsImplicitCastForDecimal(udf)) {
// Cast the input to decimal
// If castType is decimal, try not to lose precision for numeric types.
castType = updatePrecision(inputTypeInfo, (DecimalTypeInfo) castType);
GenericUDFToDecimal castToDecimalUDF = new GenericUDFToDecimal();
castToDecimalUDF.setTypeInfo(castType);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(child);
ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, castToDecimalUDF, children);
return desc;
}
} else if (!castTypeDecimal && inputTypeDecimal) {
if (needsImplicitCastForDecimal(udf)) {
// Cast decimal input to returnType
GenericUDF genericUdf = getGenericUDFForCast(castType);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(child);
ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf, children);
return desc;
}
} else {
// Casts to exact types including long to double etc. are needed in some special cases.
if (udf instanceof GenericUDFCoalesce || udf instanceof GenericUDFNvl || udf instanceof GenericUDFElt) {
GenericUDF genericUdf = getGenericUDFForCast(castType);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(child);
ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf, children);
return desc;
}
}
return null;
}
use of org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo in project hive by apache.
the class VerifyFastRow method doVerifyDeserializeRead.
public static void doVerifyDeserializeRead(DeserializeRead deserializeRead, TypeInfo typeInfo, Object object, boolean isNull) throws IOException {
if (isNull) {
if (object != null) {
TestCase.fail("Field reports null but object is not null (class " + object.getClass().getName() + ", " + object.toString() + ")");
}
return;
} else if (object == null) {
TestCase.fail("Field report not null but object is null");
}
switch(typeInfo.getCategory()) {
case PRIMITIVE:
{
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
switch(primitiveTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
{
boolean value = deserializeRead.currentBoolean;
if (!(object instanceof BooleanWritable)) {
TestCase.fail("Boolean expected writable not Boolean");
}
boolean expected = ((BooleanWritable) object).get();
if (value != expected) {
TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case BYTE:
{
byte value = deserializeRead.currentByte;
if (!(object instanceof ByteWritable)) {
TestCase.fail("Byte expected writable not Byte");
}
byte expected = ((ByteWritable) object).get();
if (value != expected) {
TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
}
}
break;
case SHORT:
{
short value = deserializeRead.currentShort;
if (!(object instanceof ShortWritable)) {
TestCase.fail("Short expected writable not Short");
}
short expected = ((ShortWritable) object).get();
if (value != expected) {
TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case INT:
{
int value = deserializeRead.currentInt;
if (!(object instanceof IntWritable)) {
TestCase.fail("Integer expected writable not Integer");
}
int expected = ((IntWritable) object).get();
if (value != expected) {
TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case LONG:
{
long value = deserializeRead.currentLong;
if (!(object instanceof LongWritable)) {
TestCase.fail("Long expected writable not Long");
}
Long expected = ((LongWritable) object).get();
if (value != expected) {
TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case FLOAT:
{
float value = deserializeRead.currentFloat;
if (!(object instanceof FloatWritable)) {
TestCase.fail("Float expected writable not Float");
}
float expected = ((FloatWritable) object).get();
if (value != expected) {
TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case DOUBLE:
{
double value = deserializeRead.currentDouble;
if (!(object instanceof DoubleWritable)) {
TestCase.fail("Double expected writable not Double");
}
double expected = ((DoubleWritable) object).get();
if (value != expected) {
TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case STRING:
{
byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
Text text = new Text(stringBytes);
String string = text.toString();
String expected = ((Text) object).toString();
if (!string.equals(expected)) {
TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')");
}
}
break;
case CHAR:
{
byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
Text text = new Text(stringBytes);
String string = text.toString();
HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
HiveChar expected = ((HiveCharWritable) object).getHiveChar();
if (!hiveChar.equals(expected)) {
TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')");
}
}
break;
case VARCHAR:
{
byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
Text text = new Text(stringBytes);
String string = text.toString();
HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
HiveVarchar expected = ((HiveVarcharWritable) object).getHiveVarchar();
if (!hiveVarchar.equals(expected)) {
TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')");
}
}
break;
case DECIMAL:
{
HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal();
if (value == null) {
TestCase.fail("Decimal field evaluated to NULL");
}
HiveDecimal expected = ((HiveDecimalWritable) object).getHiveDecimal();
if (!value.equals(expected)) {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
int precision = decimalTypeInfo.getPrecision();
int scale = decimalTypeInfo.getScale();
TestCase.fail("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
}
}
break;
case DATE:
{
Date value = deserializeRead.currentDateWritable.get();
Date expected = ((DateWritable) object).get();
if (!value.equals(expected)) {
TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case TIMESTAMP:
{
Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
Timestamp expected = ((TimestampWritable) object).getTimestamp();
if (!value.equals(expected)) {
TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case INTERVAL_YEAR_MONTH:
{
HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth();
if (!value.equals(expected)) {
TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case INTERVAL_DAY_TIME:
{
HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime();
if (!value.equals(expected)) {
TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case BINARY:
{
byte[] byteArray = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
BytesWritable bytesWritable = (BytesWritable) object;
byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
if (byteArray.length != expected.length) {
TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
}
for (int b = 0; b < byteArray.length; b++) {
if (byteArray[b] != expected[b]) {
TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
}
}
}
break;
default:
throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory());
}
}
break;
case LIST:
case MAP:
case STRUCT:
case UNION:
throw new Error("Complex types need to be handled separately");
default:
throw new Error("Unknown category " + typeInfo.getCategory());
}
}
Aggregations