use of org.apache.hadoop.hive.serde2.proto.test.Complexpb.Complex in project hive by apache.
the class MetadataTypedColumnsetSerDe method serialize.
@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
if (objInspector.getCategory() != Category.STRUCT) {
throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
}
StructObjectInspector soi = (StructObjectInspector) objInspector;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < fields.size(); i++) {
if (i > 0) {
sb.append(separator);
}
Object column = soi.getStructFieldData(obj, fields.get(i));
if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
// For primitive object, serialize to plain string
sb.append(column == null ? nullString : column.toString());
} else {
// For complex object, serialize to JSON format
sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
}
}
serializeCache.set(sb.toString());
return serializeCache;
}
use of org.apache.hadoop.hive.serde2.proto.test.Complexpb.Complex in project hive by apache.
the class StatsUtils method getAvgColLenOf.
/**
* Get the raw data size of variable length data types
* @param conf
* - hive conf
* @param oi
* - object inspector
* @param colType
* - column type
* @return raw data size
*/
public static long getAvgColLenOf(HiveConf conf, ObjectInspector oi, String colType) {
long configVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH);
String colTypeLowCase = colType.toLowerCase();
if (colTypeLowCase.equals(serdeConstants.STRING_TYPE_NAME)) {
// constant string projection Ex: select "hello" from table
if (oi instanceof ConstantObjectInspector) {
ConstantObjectInspector coi = (ConstantObjectInspector) oi;
// if writable constant is null then return size 0
Object constantValue = coi.getWritableConstantValue();
return constantValue == null ? 0 : constantValue.toString().length();
} else if (oi instanceof StringObjectInspector) {
// return the variable length from config
return configVarLen;
}
} else if (colTypeLowCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
// constant varchar projection
if (oi instanceof ConstantObjectInspector) {
ConstantObjectInspector coi = (ConstantObjectInspector) oi;
// if writable constant is null then return size 0
Object constantValue = coi.getWritableConstantValue();
return constantValue == null ? 0 : constantValue.toString().length();
} else if (oi instanceof HiveVarcharObjectInspector) {
VarcharTypeInfo type = (VarcharTypeInfo) ((HiveVarcharObjectInspector) oi).getTypeInfo();
return type.getLength();
}
} else if (colTypeLowCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
// constant char projection
if (oi instanceof ConstantObjectInspector) {
ConstantObjectInspector coi = (ConstantObjectInspector) oi;
// if writable constant is null then return size 0
Object constantValue = coi.getWritableConstantValue();
return constantValue == null ? 0 : constantValue.toString().length();
} else if (oi instanceof HiveCharObjectInspector) {
CharTypeInfo type = (CharTypeInfo) ((HiveCharObjectInspector) oi).getTypeInfo();
return type.getLength();
}
} else if (colTypeLowCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
// constant byte arrays
if (oi instanceof ConstantObjectInspector) {
ConstantObjectInspector coi = (ConstantObjectInspector) oi;
// if writable constant is null then return size 0
BytesWritable constantValue = (BytesWritable) coi.getWritableConstantValue();
return constantValue == null ? 0 : constantValue.getLength();
} else if (oi instanceof BinaryObjectInspector) {
// return the variable length from config
return configVarLen;
}
} else {
// complex types (map, list, struct, union)
return getSizeOfComplexTypes(conf, oi);
}
throw new IllegalArgumentException("Size requested for unknown type: " + colType + " OI: " + oi.getTypeName());
}
use of org.apache.hadoop.hive.serde2.proto.test.Complexpb.Complex in project hive by apache.
the class StatsUtils method getColStatisticsFromExpression.
/**
* Get column statistics expression nodes
* @param conf
* - hive conf
* @param parentStats
* - parent statistics
* @param end
* - expression nodes
* @return column statistics
*/
public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statistics parentStats, ExprNodeDesc end) {
if (end == null) {
return null;
}
String colName = null;
String colType = null;
double avgColSize = 0;
long countDistincts = 0;
long numNulls = 0;
ObjectInspector oi = end.getWritableObjectInspector();
long numRows = parentStats.getNumRows();
if (end instanceof ExprNodeColumnDesc) {
// column projection
ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end;
colName = encd.getColumn();
if (encd.getIsPartitionColOrVirtualCol()) {
ColStatistics colStats = parentStats.getColumnStatisticsFromColName(colName);
if (colStats != null) {
/* If statistics for the column already exist use it. */
return colStats.clone();
}
// virtual columns
colType = encd.getTypeInfo().getTypeName();
countDistincts = numRows;
} else {
// clone the column stats and return
ColStatistics result = parentStats.getColumnStatisticsFromColName(colName);
if (result != null) {
return result.clone();
}
return null;
}
} else if (end instanceof ExprNodeConstantDesc) {
// constant projection
ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end;
colName = encd.getName();
colType = encd.getTypeString();
if (encd.getValue() == null) {
// null projection
numNulls = numRows;
} else {
countDistincts = 1;
}
} else if (end instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end;
colName = engfd.getName();
colType = engfd.getTypeString();
// If it is a widening cast, we do not change NDV, min, max
if (isWideningCast(engfd) && engfd.getChildren().get(0) instanceof ExprNodeColumnDesc) {
// cast on single column
ColStatistics stats = parentStats.getColumnStatisticsFromColName(engfd.getCols().get(0));
if (stats != null) {
ColStatistics newStats;
newStats = stats.clone();
newStats.setColumnName(colName);
colType = colType.toLowerCase();
newStats.setColumnType(colType);
newStats.setAvgColLen(getAvgColLenOf(conf, oi, colType));
return newStats;
}
}
// fallback to default
countDistincts = getNDVFor(engfd, numRows, parentStats);
} else if (end instanceof ExprNodeColumnListDesc) {
// column list
ExprNodeColumnListDesc encd = (ExprNodeColumnListDesc) end;
colName = Joiner.on(",").join(encd.getCols());
colType = serdeConstants.LIST_TYPE_NAME;
countDistincts = numRows;
} else if (end instanceof ExprNodeFieldDesc) {
// field within complex type
ExprNodeFieldDesc enfd = (ExprNodeFieldDesc) end;
colName = enfd.getFieldName();
colType = enfd.getTypeString();
countDistincts = numRows;
} else {
throw new IllegalArgumentException("not supported expr type " + end.getClass());
}
colType = colType.toLowerCase();
avgColSize = getAvgColLenOf(conf, oi, colType);
ColStatistics colStats = new ColStatistics(colName, colType);
colStats.setAvgColLen(avgColSize);
colStats.setCountDistint(countDistincts);
colStats.setNumNulls(numNulls);
return colStats;
}
use of org.apache.hadoop.hive.serde2.proto.test.Complexpb.Complex in project hive by apache.
the class VectorVerifyFast method doVerifyDeserializeRead.
public static void doVerifyDeserializeRead(DeserializeRead deserializeRead, TypeInfo typeInfo, Object object, boolean isNull) throws IOException {
if (isNull) {
if (object != null) {
TestCase.fail("Field reports null but object is not null (class " + object.getClass().getName() + ", " + object.toString() + ")");
}
return;
} else if (object == null) {
TestCase.fail("Field report not null but object is null");
}
switch(typeInfo.getCategory()) {
case PRIMITIVE:
{
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
switch(primitiveTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
{
boolean value = deserializeRead.currentBoolean;
if (!(object instanceof BooleanWritable)) {
TestCase.fail("Boolean expected writable not Boolean");
}
boolean expected = ((BooleanWritable) object).get();
if (value != expected) {
TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case BYTE:
{
byte value = deserializeRead.currentByte;
if (!(object instanceof ByteWritable)) {
TestCase.fail("Byte expected writable not Byte");
}
byte expected = ((ByteWritable) object).get();
if (value != expected) {
TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
}
}
break;
case SHORT:
{
short value = deserializeRead.currentShort;
if (!(object instanceof ShortWritable)) {
TestCase.fail("Short expected writable not Short");
}
short expected = ((ShortWritable) object).get();
if (value != expected) {
TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case INT:
{
int value = deserializeRead.currentInt;
if (!(object instanceof IntWritable)) {
TestCase.fail("Integer expected writable not Integer");
}
int expected = ((IntWritable) object).get();
if (value != expected) {
TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case LONG:
{
long value = deserializeRead.currentLong;
if (!(object instanceof LongWritable)) {
TestCase.fail("Long expected writable not Long");
}
Long expected = ((LongWritable) object).get();
if (value != expected) {
TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case FLOAT:
{
float value = deserializeRead.currentFloat;
if (!(object instanceof FloatWritable)) {
TestCase.fail("Float expected writable not Float");
}
float expected = ((FloatWritable) object).get();
if (value != expected) {
TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case DOUBLE:
{
double value = deserializeRead.currentDouble;
if (!(object instanceof DoubleWritable)) {
TestCase.fail("Double expected writable not Double");
}
double expected = ((DoubleWritable) object).get();
if (value != expected) {
TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case STRING:
{
byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
Text text = new Text(stringBytes);
String string = text.toString();
String expected = ((Text) object).toString();
if (!string.equals(expected)) {
TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')");
}
}
break;
case CHAR:
{
byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
Text text = new Text(stringBytes);
String string = text.toString();
HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
HiveChar expected = ((HiveCharWritable) object).getHiveChar();
if (!hiveChar.equals(expected)) {
TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')");
}
}
break;
case VARCHAR:
{
byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
Text text = new Text(stringBytes);
String string = text.toString();
HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
HiveVarchar expected = ((HiveVarcharWritable) object).getHiveVarchar();
if (!hiveVarchar.equals(expected)) {
TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')");
}
}
break;
case DECIMAL:
{
HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal();
if (value == null) {
TestCase.fail("Decimal field evaluated to NULL");
}
HiveDecimal expected = ((HiveDecimalWritable) object).getHiveDecimal();
if (!value.equals(expected)) {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
int precision = decimalTypeInfo.getPrecision();
int scale = decimalTypeInfo.getScale();
TestCase.fail("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
}
}
break;
case DATE:
{
Date value = deserializeRead.currentDateWritable.get();
Date expected = ((DateWritable) object).get();
if (!value.equals(expected)) {
TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case TIMESTAMP:
{
Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
Timestamp expected = ((TimestampWritable) object).getTimestamp();
if (!value.equals(expected)) {
TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case INTERVAL_YEAR_MONTH:
{
HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth();
if (!value.equals(expected)) {
TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case INTERVAL_DAY_TIME:
{
HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime();
if (!value.equals(expected)) {
TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case BINARY:
{
byte[] byteArray = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
BytesWritable bytesWritable = (BytesWritable) object;
byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
if (byteArray.length != expected.length) {
TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
}
for (int b = 0; b < byteArray.length; b++) {
if (byteArray[b] != expected[b]) {
TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
}
}
}
break;
default:
throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory());
}
}
break;
case LIST:
case MAP:
case STRUCT:
case UNION:
throw new Error("Complex types need to be handled separately");
default:
throw new Error("Unknown category " + typeInfo.getCategory());
}
}
use of org.apache.hadoop.hive.serde2.proto.test.Complexpb.Complex in project hive by apache.
the class SemanticAnalyzer method genConversionSelectOperator.
/**
* Generate the conversion SelectOperator that converts the columns into the
* types that are expected by the table_desc.
*/
Operator genConversionSelectOperator(String dest, QB qb, Operator input, TableDesc table_desc, DynamicPartitionCtx dpCtx) throws SemanticException {
StructObjectInspector oi = null;
try {
Deserializer deserializer = table_desc.getDeserializerClass().newInstance();
SerDeUtils.initializeSerDe(deserializer, conf, table_desc.getProperties(), null);
oi = (StructObjectInspector) deserializer.getObjectInspector();
} catch (Exception e) {
throw new SemanticException(e);
}
// Check column number
List<? extends StructField> tableFields = oi.getAllStructFieldRefs();
boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING);
ArrayList<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos();
int inColumnCnt = rowFields.size();
int outColumnCnt = tableFields.size();
if (dynPart && dpCtx != null) {
outColumnCnt += dpCtx.getNumDPCols();
}
// The numbers of input columns and output columns should match for regular query
if (!updating(dest) && !deleting(dest) && inColumnCnt != outColumnCnt) {
String reason = "Table " + dest + " has " + outColumnCnt + " columns, but query has " + inColumnCnt + " columns.";
throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(qb.getParseInfo().getDestForClause(dest), reason));
}
// Check column types
boolean converted = false;
int columnNumber = tableFields.size();
ArrayList<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber);
// MetadataTypedColumnsetSerDe does not need type conversions because it
// does the conversion to String by itself.
boolean isMetaDataSerDe = table_desc.getDeserializerClass().equals(MetadataTypedColumnsetSerDe.class);
boolean isLazySimpleSerDe = table_desc.getDeserializerClass().equals(LazySimpleSerDe.class);
if (!isMetaDataSerDe && !deleting(dest)) {
// offset by 1 so that we don't try to convert the ROW__ID
if (updating(dest)) {
expressions.add(new ExprNodeColumnDesc(rowFields.get(0).getType(), rowFields.get(0).getInternalName(), "", true));
}
// here only deals with non-partition columns. We deal with partition columns next
for (int i = 0; i < columnNumber; i++) {
int rowFieldsOffset = updating(dest) ? i + 1 : i;
ObjectInspector tableFieldOI = tableFields.get(i).getFieldObjectInspector();
TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI);
TypeInfo rowFieldTypeInfo = rowFields.get(rowFieldsOffset).getType();
ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(rowFieldsOffset).getInternalName(), "", false, rowFields.get(rowFieldsOffset).isSkewedCol());
// Thus, we still keep the conversion.
if (!tableFieldTypeInfo.equals(rowFieldTypeInfo)) {
// need to do some conversions here
converted = true;
if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) {
// cannot convert to complex types
column = null;
} else {
column = ParseUtils.createConversionCast(column, (PrimitiveTypeInfo) tableFieldTypeInfo);
}
if (column == null) {
String reason = "Cannot convert column " + i + " from " + rowFieldTypeInfo + " to " + tableFieldTypeInfo + ".";
throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(qb.getParseInfo().getDestForClause(dest), reason));
}
}
expressions.add(column);
}
}
// deal with dynamic partition columns: convert ExprNodeDesc type to String??
if (dynPart && dpCtx != null && dpCtx.getNumDPCols() > 0) {
// DP columns starts with tableFields.size()
for (int i = tableFields.size() + (updating(dest) ? 1 : 0); i < rowFields.size(); ++i) {
TypeInfo rowFieldTypeInfo = rowFields.get(i).getType();
ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(i).getInternalName(), "", true);
expressions.add(column);
}
// converted = true; // [TODO]: should we check & convert type to String and set it to true?
}
if (converted) {
// add the select operator
RowResolver rowResolver = new RowResolver();
ArrayList<String> colNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < expressions.size(); i++) {
String name = getColumnInternalName(i);
rowResolver.put("", name, new ColumnInfo(name, expressions.get(i).getTypeInfo(), "", false));
colNames.add(name);
colExprMap.put(name, expressions.get(i));
}
input = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(expressions, colNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver);
input.setColumnExprMap(colExprMap);
}
return input;
}
Aggregations