Search in sources :

Example 21 with Complex

use of org.apache.hadoop.hive.serde2.proto.test.Complexpb.Complex in project hive by apache.

the class MetadataTypedColumnsetSerDe method serialize.

@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    if (objInspector.getCategory() != Category.STRUCT) {
        throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
    }
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < fields.size(); i++) {
        if (i > 0) {
            sb.append(separator);
        }
        Object column = soi.getStructFieldData(obj, fields.get(i));
        if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
            // For primitive object, serialize to plain string
            sb.append(column == null ? nullString : column.toString());
        } else {
            // For complex object, serialize to JSON format
            sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
        }
    }
    serializeCache.set(sb.toString());
    return serializeCache;
}
Also used : MetadataListStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MetadataListStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 22 with Complex

use of org.apache.hadoop.hive.serde2.proto.test.Complexpb.Complex in project hive by apache.

the class StatsUtils method getAvgColLenOf.

/**
 * Get the raw data size of variable length data types
 * @param conf
 *          - hive conf
 * @param oi
 *          - object inspector
 * @param colType
 *          - column type
 * @return raw data size
 */
public static long getAvgColLenOf(HiveConf conf, ObjectInspector oi, String colType) {
    long configVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH);
    String colTypeLowCase = colType.toLowerCase();
    if (colTypeLowCase.equals(serdeConstants.STRING_TYPE_NAME)) {
        // constant string projection Ex: select "hello" from table
        if (oi instanceof ConstantObjectInspector) {
            ConstantObjectInspector coi = (ConstantObjectInspector) oi;
            // if writable constant is null then return size 0
            Object constantValue = coi.getWritableConstantValue();
            return constantValue == null ? 0 : constantValue.toString().length();
        } else if (oi instanceof StringObjectInspector) {
            // return the variable length from config
            return configVarLen;
        }
    } else if (colTypeLowCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
        // constant varchar projection
        if (oi instanceof ConstantObjectInspector) {
            ConstantObjectInspector coi = (ConstantObjectInspector) oi;
            // if writable constant is null then return size 0
            Object constantValue = coi.getWritableConstantValue();
            return constantValue == null ? 0 : constantValue.toString().length();
        } else if (oi instanceof HiveVarcharObjectInspector) {
            VarcharTypeInfo type = (VarcharTypeInfo) ((HiveVarcharObjectInspector) oi).getTypeInfo();
            return type.getLength();
        }
    } else if (colTypeLowCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
        // constant char projection
        if (oi instanceof ConstantObjectInspector) {
            ConstantObjectInspector coi = (ConstantObjectInspector) oi;
            // if writable constant is null then return size 0
            Object constantValue = coi.getWritableConstantValue();
            return constantValue == null ? 0 : constantValue.toString().length();
        } else if (oi instanceof HiveCharObjectInspector) {
            CharTypeInfo type = (CharTypeInfo) ((HiveCharObjectInspector) oi).getTypeInfo();
            return type.getLength();
        }
    } else if (colTypeLowCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
        // constant byte arrays
        if (oi instanceof ConstantObjectInspector) {
            ConstantObjectInspector coi = (ConstantObjectInspector) oi;
            // if writable constant is null then return size 0
            BytesWritable constantValue = (BytesWritable) coi.getWritableConstantValue();
            return constantValue == null ? 0 : constantValue.getLength();
        } else if (oi instanceof BinaryObjectInspector) {
            // return the variable length from config
            return configVarLen;
        }
    } else {
        // complex types (map, list, struct, union)
        return getSizeOfComplexTypes(conf, oi);
    }
    throw new IllegalArgumentException("Size requested for unknown type: " + colType + " OI: " + oi.getTypeName());
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) WritableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector)

Example 23 with Complex

use of org.apache.hadoop.hive.serde2.proto.test.Complexpb.Complex in project hive by apache.

the class StatsUtils method getColStatisticsFromExpression.

/**
 * Get column statistics expression nodes
 * @param conf
 *          - hive conf
 * @param parentStats
 *          - parent statistics
 * @param end
 *          - expression nodes
 * @return column statistics
 */
public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statistics parentStats, ExprNodeDesc end) {
    if (end == null) {
        return null;
    }
    String colName = null;
    String colType = null;
    double avgColSize = 0;
    long countDistincts = 0;
    long numNulls = 0;
    ObjectInspector oi = end.getWritableObjectInspector();
    long numRows = parentStats.getNumRows();
    if (end instanceof ExprNodeColumnDesc) {
        // column projection
        ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end;
        colName = encd.getColumn();
        if (encd.getIsPartitionColOrVirtualCol()) {
            ColStatistics colStats = parentStats.getColumnStatisticsFromColName(colName);
            if (colStats != null) {
                /* If statistics for the column already exist use it. */
                return colStats.clone();
            }
            // virtual columns
            colType = encd.getTypeInfo().getTypeName();
            countDistincts = numRows;
        } else {
            // clone the column stats and return
            ColStatistics result = parentStats.getColumnStatisticsFromColName(colName);
            if (result != null) {
                return result.clone();
            }
            return null;
        }
    } else if (end instanceof ExprNodeConstantDesc) {
        // constant projection
        ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end;
        colName = encd.getName();
        colType = encd.getTypeString();
        if (encd.getValue() == null) {
            // null projection
            numNulls = numRows;
        } else {
            countDistincts = 1;
        }
    } else if (end instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end;
        colName = engfd.getName();
        colType = engfd.getTypeString();
        // If it is a widening cast, we do not change NDV, min, max
        if (isWideningCast(engfd) && engfd.getChildren().get(0) instanceof ExprNodeColumnDesc) {
            // cast on single column
            ColStatistics stats = parentStats.getColumnStatisticsFromColName(engfd.getCols().get(0));
            if (stats != null) {
                ColStatistics newStats;
                newStats = stats.clone();
                newStats.setColumnName(colName);
                colType = colType.toLowerCase();
                newStats.setColumnType(colType);
                newStats.setAvgColLen(getAvgColLenOf(conf, oi, colType));
                return newStats;
            }
        }
        // fallback to default
        countDistincts = getNDVFor(engfd, numRows, parentStats);
    } else if (end instanceof ExprNodeColumnListDesc) {
        // column list
        ExprNodeColumnListDesc encd = (ExprNodeColumnListDesc) end;
        colName = Joiner.on(",").join(encd.getCols());
        colType = serdeConstants.LIST_TYPE_NAME;
        countDistincts = numRows;
    } else if (end instanceof ExprNodeFieldDesc) {
        // field within complex type
        ExprNodeFieldDesc enfd = (ExprNodeFieldDesc) end;
        colName = enfd.getFieldName();
        colType = enfd.getTypeString();
        countDistincts = numRows;
    } else {
        throw new IllegalArgumentException("not supported expr type " + end.getClass());
    }
    colType = colType.toLowerCase();
    avgColSize = getAvgColLenOf(conf, oi, colType);
    ColStatistics colStats = new ColStatistics(colName, colType);
    colStats.setAvgColLen(avgColSize);
    colStats.setCountDistint(countDistincts);
    colStats.setNumNulls(numNulls);
    return colStats;
}
Also used : WritableIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector) WritableByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) WritableTimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampLocalTZObjectInspector) StandardConstantListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector) StandardConstantMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) WritableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector) WritableShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector) WritableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector) WritableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) WritableHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector) WritableDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector) StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) WritableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) WritableBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector) StandardConstantStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector) StandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) WritableFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeColumnListDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ColStatistics(org.apache.hadoop.hive.ql.plan.ColStatistics) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)

Example 24 with Complex

use of org.apache.hadoop.hive.serde2.proto.test.Complexpb.Complex in project hive by apache.

the class VectorVerifyFast method doVerifyDeserializeRead.

public static void doVerifyDeserializeRead(DeserializeRead deserializeRead, TypeInfo typeInfo, Object object, boolean isNull) throws IOException {
    if (isNull) {
        if (object != null) {
            TestCase.fail("Field reports null but object is not null (class " + object.getClass().getName() + ", " + object.toString() + ")");
        }
        return;
    } else if (object == null) {
        TestCase.fail("Field report not null but object is null");
    }
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
                switch(primitiveTypeInfo.getPrimitiveCategory()) {
                    case BOOLEAN:
                        {
                            boolean value = deserializeRead.currentBoolean;
                            if (!(object instanceof BooleanWritable)) {
                                TestCase.fail("Boolean expected writable not Boolean");
                            }
                            boolean expected = ((BooleanWritable) object).get();
                            if (value != expected) {
                                TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
                            }
                        }
                        break;
                    case BYTE:
                        {
                            byte value = deserializeRead.currentByte;
                            if (!(object instanceof ByteWritable)) {
                                TestCase.fail("Byte expected writable not Byte");
                            }
                            byte expected = ((ByteWritable) object).get();
                            if (value != expected) {
                                TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
                            }
                        }
                        break;
                    case SHORT:
                        {
                            short value = deserializeRead.currentShort;
                            if (!(object instanceof ShortWritable)) {
                                TestCase.fail("Short expected writable not Short");
                            }
                            short expected = ((ShortWritable) object).get();
                            if (value != expected) {
                                TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
                            }
                        }
                        break;
                    case INT:
                        {
                            int value = deserializeRead.currentInt;
                            if (!(object instanceof IntWritable)) {
                                TestCase.fail("Integer expected writable not Integer");
                            }
                            int expected = ((IntWritable) object).get();
                            if (value != expected) {
                                TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
                            }
                        }
                        break;
                    case LONG:
                        {
                            long value = deserializeRead.currentLong;
                            if (!(object instanceof LongWritable)) {
                                TestCase.fail("Long expected writable not Long");
                            }
                            Long expected = ((LongWritable) object).get();
                            if (value != expected) {
                                TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
                            }
                        }
                        break;
                    case FLOAT:
                        {
                            float value = deserializeRead.currentFloat;
                            if (!(object instanceof FloatWritable)) {
                                TestCase.fail("Float expected writable not Float");
                            }
                            float expected = ((FloatWritable) object).get();
                            if (value != expected) {
                                TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
                            }
                        }
                        break;
                    case DOUBLE:
                        {
                            double value = deserializeRead.currentDouble;
                            if (!(object instanceof DoubleWritable)) {
                                TestCase.fail("Double expected writable not Double");
                            }
                            double expected = ((DoubleWritable) object).get();
                            if (value != expected) {
                                TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
                            }
                        }
                        break;
                    case STRING:
                        {
                            byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                            Text text = new Text(stringBytes);
                            String string = text.toString();
                            String expected = ((Text) object).toString();
                            if (!string.equals(expected)) {
                                TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')");
                            }
                        }
                        break;
                    case CHAR:
                        {
                            byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                            Text text = new Text(stringBytes);
                            String string = text.toString();
                            HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
                            HiveChar expected = ((HiveCharWritable) object).getHiveChar();
                            if (!hiveChar.equals(expected)) {
                                TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')");
                            }
                        }
                        break;
                    case VARCHAR:
                        {
                            byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                            Text text = new Text(stringBytes);
                            String string = text.toString();
                            HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
                            HiveVarchar expected = ((HiveVarcharWritable) object).getHiveVarchar();
                            if (!hiveVarchar.equals(expected)) {
                                TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')");
                            }
                        }
                        break;
                    case DECIMAL:
                        {
                            HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal();
                            if (value == null) {
                                TestCase.fail("Decimal field evaluated to NULL");
                            }
                            HiveDecimal expected = ((HiveDecimalWritable) object).getHiveDecimal();
                            if (!value.equals(expected)) {
                                DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
                                int precision = decimalTypeInfo.getPrecision();
                                int scale = decimalTypeInfo.getScale();
                                TestCase.fail("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
                            }
                        }
                        break;
                    case DATE:
                        {
                            Date value = deserializeRead.currentDateWritable.get();
                            Date expected = ((DateWritable) object).get();
                            if (!value.equals(expected)) {
                                TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                            }
                        }
                        break;
                    case TIMESTAMP:
                        {
                            Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
                            Timestamp expected = ((TimestampWritable) object).getTimestamp();
                            if (!value.equals(expected)) {
                                TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                            }
                        }
                        break;
                    case INTERVAL_YEAR_MONTH:
                        {
                            HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
                            HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth();
                            if (!value.equals(expected)) {
                                TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                            }
                        }
                        break;
                    case INTERVAL_DAY_TIME:
                        {
                            HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
                            HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime();
                            if (!value.equals(expected)) {
                                TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                            }
                        }
                        break;
                    case BINARY:
                        {
                            byte[] byteArray = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                            BytesWritable bytesWritable = (BytesWritable) object;
                            byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
                            if (byteArray.length != expected.length) {
                                TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
                            }
                            for (int b = 0; b < byteArray.length; b++) {
                                if (byteArray[b] != expected[b]) {
                                    TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
                                }
                            }
                        }
                        break;
                    default:
                        throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory());
                }
            }
            break;
        case LIST:
        case MAP:
        case STRUCT:
        case UNION:
            throw new Error("Complex types need to be handled separately");
        default:
            throw new Error("Unknown category " + typeInfo.getCategory());
    }
}
Also used : HiveChar(org.apache.hadoop.hive.common.type.HiveChar) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) Timestamp(java.sql.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) Text(org.apache.hadoop.io.Text) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) Date(java.sql.Date) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) BooleanWritable(org.apache.hadoop.io.BooleanWritable)

Example 25 with Complex

use of org.apache.hadoop.hive.serde2.proto.test.Complexpb.Complex in project hive by apache.

the class SemanticAnalyzer method genConversionSelectOperator.

/**
 * Generate the conversion SelectOperator that converts the columns into the
 * types that are expected by the table_desc.
 */
Operator genConversionSelectOperator(String dest, QB qb, Operator input, TableDesc table_desc, DynamicPartitionCtx dpCtx) throws SemanticException {
    StructObjectInspector oi = null;
    try {
        Deserializer deserializer = table_desc.getDeserializerClass().newInstance();
        SerDeUtils.initializeSerDe(deserializer, conf, table_desc.getProperties(), null);
        oi = (StructObjectInspector) deserializer.getObjectInspector();
    } catch (Exception e) {
        throw new SemanticException(e);
    }
    // Check column number
    List<? extends StructField> tableFields = oi.getAllStructFieldRefs();
    boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING);
    ArrayList<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos();
    int inColumnCnt = rowFields.size();
    int outColumnCnt = tableFields.size();
    if (dynPart && dpCtx != null) {
        outColumnCnt += dpCtx.getNumDPCols();
    }
    // The numbers of input columns and output columns should match for regular query
    if (!updating(dest) && !deleting(dest) && inColumnCnt != outColumnCnt) {
        String reason = "Table " + dest + " has " + outColumnCnt + " columns, but query has " + inColumnCnt + " columns.";
        throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(qb.getParseInfo().getDestForClause(dest), reason));
    }
    // Check column types
    boolean converted = false;
    int columnNumber = tableFields.size();
    ArrayList<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber);
    // MetadataTypedColumnsetSerDe does not need type conversions because it
    // does the conversion to String by itself.
    boolean isMetaDataSerDe = table_desc.getDeserializerClass().equals(MetadataTypedColumnsetSerDe.class);
    boolean isLazySimpleSerDe = table_desc.getDeserializerClass().equals(LazySimpleSerDe.class);
    if (!isMetaDataSerDe && !deleting(dest)) {
        // offset by 1 so that we don't try to convert the ROW__ID
        if (updating(dest)) {
            expressions.add(new ExprNodeColumnDesc(rowFields.get(0).getType(), rowFields.get(0).getInternalName(), "", true));
        }
        // here only deals with non-partition columns. We deal with partition columns next
        for (int i = 0; i < columnNumber; i++) {
            int rowFieldsOffset = updating(dest) ? i + 1 : i;
            ObjectInspector tableFieldOI = tableFields.get(i).getFieldObjectInspector();
            TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI);
            TypeInfo rowFieldTypeInfo = rowFields.get(rowFieldsOffset).getType();
            ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(rowFieldsOffset).getInternalName(), "", false, rowFields.get(rowFieldsOffset).isSkewedCol());
            // Thus, we still keep the conversion.
            if (!tableFieldTypeInfo.equals(rowFieldTypeInfo)) {
                // need to do some conversions here
                converted = true;
                if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) {
                    // cannot convert to complex types
                    column = null;
                } else {
                    column = ParseUtils.createConversionCast(column, (PrimitiveTypeInfo) tableFieldTypeInfo);
                }
                if (column == null) {
                    String reason = "Cannot convert column " + i + " from " + rowFieldTypeInfo + " to " + tableFieldTypeInfo + ".";
                    throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(qb.getParseInfo().getDestForClause(dest), reason));
                }
            }
            expressions.add(column);
        }
    }
    // deal with dynamic partition columns: convert ExprNodeDesc type to String??
    if (dynPart && dpCtx != null && dpCtx.getNumDPCols() > 0) {
        // DP columns starts with tableFields.size()
        for (int i = tableFields.size() + (updating(dest) ? 1 : 0); i < rowFields.size(); ++i) {
            TypeInfo rowFieldTypeInfo = rowFields.get(i).getType();
            ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(i).getInternalName(), "", true);
            expressions.add(column);
        }
    // converted = true; // [TODO]: should we check & convert type to String and set it to true?
    }
    if (converted) {
        // add the select operator
        RowResolver rowResolver = new RowResolver();
        ArrayList<String> colNames = new ArrayList<String>();
        Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
        for (int i = 0; i < expressions.size(); i++) {
            String name = getColumnInternalName(i);
            rowResolver.put("", name, new ColumnInfo(name, expressions.get(i).getTypeInfo(), "", false));
            colNames.add(name);
            colExprMap.put(name, expressions.get(i));
        }
        input = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(expressions, colNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver);
        input.setColumnExprMap(colExprMap);
    }
    return input;
}
Also used : StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(java.security.AccessControlException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Aggregations

PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)12 ArrayList (java.util.ArrayList)10 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)10 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)9 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)8 ConstantObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)7 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)5 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)5 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)5 BooleanWritable (org.apache.hadoop.io.BooleanWritable)5 FloatWritable (org.apache.hadoop.io.FloatWritable)5 IntWritable (org.apache.hadoop.io.IntWritable)5 LongWritable (org.apache.hadoop.io.LongWritable)5 Text (org.apache.hadoop.io.Text)5 Path (org.apache.hadoop.fs.Path)3 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)3 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)3 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)3 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)3 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)3