use of org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo in project hive by apache.
the class StatsUtils method getAvgColLenOf.
/**
* Get the raw data size of variable length data types
* @param conf
* - hive conf
* @param oi
* - object inspector
* @param colType
* - column type
* @return raw data size
*/
public static long getAvgColLenOf(HiveConf conf, ObjectInspector oi, String colType) {
long configVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH);
String colTypeLowCase = colType.toLowerCase();
if (colTypeLowCase.equals(serdeConstants.STRING_TYPE_NAME)) {
// constant string projection Ex: select "hello" from table
if (oi instanceof ConstantObjectInspector) {
ConstantObjectInspector coi = (ConstantObjectInspector) oi;
// if writable constant is null then return size 0
Object constantValue = coi.getWritableConstantValue();
return constantValue == null ? 0 : constantValue.toString().length();
} else if (oi instanceof StringObjectInspector) {
// return the variable length from config
return configVarLen;
}
} else if (colTypeLowCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
// constant varchar projection
if (oi instanceof ConstantObjectInspector) {
ConstantObjectInspector coi = (ConstantObjectInspector) oi;
// if writable constant is null then return size 0
Object constantValue = coi.getWritableConstantValue();
return constantValue == null ? 0 : constantValue.toString().length();
} else if (oi instanceof HiveVarcharObjectInspector) {
VarcharTypeInfo type = (VarcharTypeInfo) ((HiveVarcharObjectInspector) oi).getTypeInfo();
return type.getLength();
}
} else if (colTypeLowCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
// constant char projection
if (oi instanceof ConstantObjectInspector) {
ConstantObjectInspector coi = (ConstantObjectInspector) oi;
// if writable constant is null then return size 0
Object constantValue = coi.getWritableConstantValue();
return constantValue == null ? 0 : constantValue.toString().length();
} else if (oi instanceof HiveCharObjectInspector) {
CharTypeInfo type = (CharTypeInfo) ((HiveCharObjectInspector) oi).getTypeInfo();
return type.getLength();
}
} else if (colTypeLowCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
// constant byte arrays
if (oi instanceof ConstantObjectInspector) {
ConstantObjectInspector coi = (ConstantObjectInspector) oi;
// if writable constant is null then return size 0
BytesWritable constantValue = (BytesWritable) coi.getWritableConstantValue();
return constantValue == null ? 0 : constantValue.getLength();
} else if (oi instanceof BinaryObjectInspector) {
// return the variable length from config
return configVarLen;
}
} else {
// complex types (map, list, struct, union)
return getSizeOfComplexTypes(conf, oi);
}
throw new IllegalArgumentException("Size requested for unknown type: " + colType + " OI: " + oi.getTypeName());
}
use of org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo in project hive by apache.
the class TestVectorizationContext method testIfConditionalExprs.
/**
* Test that correct VectorExpression classes are chosen for the
* IF (expr1, expr2, expr3) conditional expression for integer, float,
* boolean, timestamp and string input types. expr1 is always an input column expression
* of type long. expr2 and expr3 can be column expressions or constants of other types
* but must have the same type.
*/
@Test
public void testIfConditionalExprs() throws HiveException {
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false);
ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Long.class, "col2", "table", false);
ExprNodeColumnDesc col3Expr = new ExprNodeColumnDesc(Long.class, "col3", "table", false);
ExprNodeConstantDesc constDesc2 = new ExprNodeConstantDesc(new Integer(1));
ExprNodeConstantDesc constDesc3 = new ExprNodeConstantDesc(new Integer(2));
// long column/column IF
GenericUDFIf udf = new GenericUDFIf();
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>();
children1.add(col1Expr);
children1.add(col2Expr);
children1.add(col3Expr);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1);
List<String> columns = new ArrayList<String>();
columns.add("col0");
columns.add("col1");
columns.add("col2");
columns.add("col3");
VectorizationContext vc = new VectorizationContext("name", columns);
VectorExpression ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongColumnLongColumn);
// long column/scalar IF
children1.set(2, new ExprNodeConstantDesc(1L));
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongColumnLongScalar);
// long scalar/scalar IF
children1.set(1, new ExprNodeConstantDesc(1L));
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongScalarLongScalar);
// long scalar/column IF
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongScalarLongColumn);
// test for double type
col2Expr = new ExprNodeColumnDesc(Double.class, "col2", "table", false);
col3Expr = new ExprNodeColumnDesc(Double.class, "col3", "table", false);
// double column/column IF
children1.set(1, col2Expr);
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprDoubleColumnDoubleColumn);
// double column/scalar IF
children1.set(2, new ExprNodeConstantDesc(1D));
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprDoubleColumnDoubleScalar);
// double scalar/scalar IF
children1.set(1, new ExprNodeConstantDesc(1D));
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprDoubleScalarDoubleScalar);
// double scalar/column IF
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprDoubleScalarDoubleColumn);
// double scalar/long column IF
children1.set(2, new ExprNodeColumnDesc(Long.class, "col3", "table", false));
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprDoubleScalarLongColumn);
// Additional combinations of (long,double)X(column,scalar) for each of the second
// and third arguments are omitted. We have coverage of all the source templates
// already.
// test for timestamp type
col2Expr = new ExprNodeColumnDesc(Timestamp.class, "col2", "table", false);
col3Expr = new ExprNodeColumnDesc(Timestamp.class, "col3", "table", false);
// timestamp column/column IF
children1.set(1, col2Expr);
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprTimestampColumnColumn);
// timestamp column/scalar IF where scalar is really a CAST of a constant to timestamp.
ExprNodeGenericFuncDesc f = new ExprNodeGenericFuncDesc();
f.setGenericUDF(new GenericUDFTimestamp());
f.setTypeInfo(TypeInfoFactory.timestampTypeInfo);
List<ExprNodeDesc> children2 = new ArrayList<ExprNodeDesc>();
f.setChildren(children2);
children2.add(new ExprNodeConstantDesc("2013-11-05 00:00:00.000"));
children1.set(2, f);
ve = vc.getVectorExpression(exprDesc);
// We check for two different classes below because initially the result
// is IfExprLongColumnLongColumn but in the future if the system is enhanced
// with constant folding then the result will be IfExprLongColumnLongScalar.
assertTrue(IfExprTimestampColumnColumn.class == ve.getClass() || IfExprTimestampColumnScalar.class == ve.getClass());
// timestamp scalar/scalar
children1.set(1, f);
ve = vc.getVectorExpression(exprDesc);
assertTrue(IfExprTimestampColumnColumn.class == ve.getClass() || IfExprTimestampScalarScalar.class == ve.getClass());
// timestamp scalar/column
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(IfExprTimestampColumnColumn.class == ve.getClass() || IfExprTimestampScalarColumn.class == ve.getClass());
// test for boolean type
col2Expr = new ExprNodeColumnDesc(Boolean.class, "col2", "table", false);
col3Expr = new ExprNodeColumnDesc(Boolean.class, "col3", "table", false);
// column/column
children1.set(1, col2Expr);
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongColumnLongColumn);
// column/scalar IF
children1.set(2, new ExprNodeConstantDesc(true));
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongColumnLongScalar);
// scalar/scalar IF
children1.set(1, new ExprNodeConstantDesc(true));
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongScalarLongScalar);
// scalar/column IF
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongScalarLongColumn);
// test for string type
constDesc2 = new ExprNodeConstantDesc("Alpha");
constDesc3 = new ExprNodeConstantDesc("Bravo");
col2Expr = new ExprNodeColumnDesc(String.class, "col2", "table", false);
col3Expr = new ExprNodeColumnDesc(String.class, "col3", "table", false);
// column/column
children1.set(1, col2Expr);
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringGroupColumnStringGroupColumn);
// column/scalar
children1.set(2, constDesc3);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringGroupColumnStringScalar);
// scalar/scalar
children1.set(1, constDesc2);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringScalarStringScalar);
// scalar/column
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringScalarStringGroupColumn);
// test for CHAR type
CharTypeInfo charTypeInfo = new CharTypeInfo(10);
constDesc2 = new ExprNodeConstantDesc(charTypeInfo, new HiveChar("Alpha", 10));
constDesc3 = new ExprNodeConstantDesc(charTypeInfo, new HiveChar("Bravo", 10));
col2Expr = new ExprNodeColumnDesc(charTypeInfo, "col2", "table", false);
col3Expr = new ExprNodeColumnDesc(charTypeInfo, "col3", "table", false);
// column/column
children1.set(1, col2Expr);
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringGroupColumnStringGroupColumn);
// column/scalar
children1.set(2, constDesc3);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringGroupColumnCharScalar);
// scalar/scalar
children1.set(1, constDesc2);
// ve = vc.getVectorExpression(exprDesc);
// assertTrue(ve instanceof IfExprCharScalarCharScalar);
// scalar/column
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprCharScalarStringGroupColumn);
// test for VARCHAR type
VarcharTypeInfo varcharTypeInfo = new VarcharTypeInfo(10);
constDesc2 = new ExprNodeConstantDesc(varcharTypeInfo, new HiveVarchar("Alpha", 10));
constDesc3 = new ExprNodeConstantDesc(varcharTypeInfo, new HiveVarchar("Bravo", 10));
col2Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col2", "table", false);
col3Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col3", "table", false);
// column/column
children1.set(1, col2Expr);
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringGroupColumnStringGroupColumn);
// column/scalar
children1.set(2, constDesc3);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringGroupColumnVarCharScalar);
// scalar/scalar
children1.set(1, constDesc2);
// ve = vc.getVectorExpression(exprDesc);
// assertTrue(ve instanceof IfExprVarCharScalarVarCharScalar);
// scalar/column
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprVarCharScalarStringGroupColumn);
}
use of org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo in project hive by apache.
the class VectorVerifyFast method doVerifyDeserializeRead.
public static void doVerifyDeserializeRead(DeserializeRead deserializeRead, TypeInfo typeInfo, Object object, boolean isNull) throws IOException {
if (isNull) {
if (object != null) {
TestCase.fail("Field reports null but object is not null (class " + object.getClass().getName() + ", " + object.toString() + ")");
}
return;
} else if (object == null) {
TestCase.fail("Field report not null but object is null");
}
switch(typeInfo.getCategory()) {
case PRIMITIVE:
{
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
switch(primitiveTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
{
boolean value = deserializeRead.currentBoolean;
if (!(object instanceof BooleanWritable)) {
TestCase.fail("Boolean expected writable not Boolean");
}
boolean expected = ((BooleanWritable) object).get();
if (value != expected) {
TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case BYTE:
{
byte value = deserializeRead.currentByte;
if (!(object instanceof ByteWritable)) {
TestCase.fail("Byte expected writable not Byte");
}
byte expected = ((ByteWritable) object).get();
if (value != expected) {
TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
}
}
break;
case SHORT:
{
short value = deserializeRead.currentShort;
if (!(object instanceof ShortWritable)) {
TestCase.fail("Short expected writable not Short");
}
short expected = ((ShortWritable) object).get();
if (value != expected) {
TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case INT:
{
int value = deserializeRead.currentInt;
if (!(object instanceof IntWritable)) {
TestCase.fail("Integer expected writable not Integer");
}
int expected = ((IntWritable) object).get();
if (value != expected) {
TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case LONG:
{
long value = deserializeRead.currentLong;
if (!(object instanceof LongWritable)) {
TestCase.fail("Long expected writable not Long");
}
Long expected = ((LongWritable) object).get();
if (value != expected) {
TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case FLOAT:
{
float value = deserializeRead.currentFloat;
if (!(object instanceof FloatWritable)) {
TestCase.fail("Float expected writable not Float");
}
float expected = ((FloatWritable) object).get();
if (value != expected) {
TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case DOUBLE:
{
double value = deserializeRead.currentDouble;
if (!(object instanceof DoubleWritable)) {
TestCase.fail("Double expected writable not Double");
}
double expected = ((DoubleWritable) object).get();
if (value != expected) {
TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case STRING:
{
byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
Text text = new Text(stringBytes);
String string = text.toString();
String expected = ((Text) object).toString();
if (!string.equals(expected)) {
TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')");
}
}
break;
case CHAR:
{
byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
Text text = new Text(stringBytes);
String string = text.toString();
HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
HiveChar expected = ((HiveCharWritable) object).getHiveChar();
if (!hiveChar.equals(expected)) {
TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')");
}
}
break;
case VARCHAR:
{
byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
Text text = new Text(stringBytes);
String string = text.toString();
HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
HiveVarchar expected = ((HiveVarcharWritable) object).getHiveVarchar();
if (!hiveVarchar.equals(expected)) {
TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')");
}
}
break;
case DECIMAL:
{
HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal();
if (value == null) {
TestCase.fail("Decimal field evaluated to NULL");
}
HiveDecimal expected = ((HiveDecimalWritable) object).getHiveDecimal();
if (!value.equals(expected)) {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
int precision = decimalTypeInfo.getPrecision();
int scale = decimalTypeInfo.getScale();
TestCase.fail("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
}
}
break;
case DATE:
{
Date value = deserializeRead.currentDateWritable.get();
Date expected = ((DateWritable) object).get();
if (!value.equals(expected)) {
TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case TIMESTAMP:
{
Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
Timestamp expected = ((TimestampWritable) object).getTimestamp();
if (!value.equals(expected)) {
TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case INTERVAL_YEAR_MONTH:
{
HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth();
if (!value.equals(expected)) {
TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case INTERVAL_DAY_TIME:
{
HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime();
if (!value.equals(expected)) {
TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case BINARY:
{
byte[] byteArray = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
BytesWritable bytesWritable = (BytesWritable) object;
byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
if (byteArray.length != expected.length) {
TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
}
for (int b = 0; b < byteArray.length; b++) {
if (byteArray[b] != expected[b]) {
TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
}
}
}
break;
default:
throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory());
}
}
break;
case LIST:
case MAP:
case STRUCT:
case UNION:
throw new Error("Complex types need to be handled separately");
default:
throw new Error("Unknown category " + typeInfo.getCategory());
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo in project hive by apache.
the class TypeInfoFactory method createPrimitiveTypeInfo.
/**
* Create PrimitiveTypeInfo instance for the given full name of the type. The returned
* type is one of the parameterized type info such as VarcharTypeInfo.
*
* @param fullName Fully qualified name of the type
* @return PrimitiveTypeInfo instance
*/
private static PrimitiveTypeInfo createPrimitiveTypeInfo(String fullName) {
String baseName = TypeInfoUtils.getBaseName(fullName);
PrimitiveTypeEntry typeEntry = PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(baseName);
if (null == typeEntry) {
throw new RuntimeException("Unknown type " + fullName);
}
TypeInfoUtils.PrimitiveParts parts = TypeInfoUtils.parsePrimitiveParts(fullName);
if (parts.typeParams == null || parts.typeParams.length < 1) {
return null;
}
switch(typeEntry.primitiveCategory) {
case CHAR:
if (parts.typeParams.length != 1) {
return null;
}
return new CharTypeInfo(Integer.valueOf(parts.typeParams[0]));
case VARCHAR:
if (parts.typeParams.length != 1) {
return null;
}
return new VarcharTypeInfo(Integer.valueOf(parts.typeParams[0]));
case DECIMAL:
if (parts.typeParams.length != 2) {
return null;
}
return new DecimalTypeInfo(Integer.valueOf(parts.typeParams[0]), Integer.valueOf(parts.typeParams[1]));
case TIMESTAMPLOCALTZ:
if (parts.typeParams.length != 1) {
return null;
}
return new TimestampLocalTZTypeInfo(parts.typeParams[0]);
default:
return null;
}
}
Aggregations