Search in sources :

Example 91 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class ColumnStatsAutoGatherContext method replaceSelectOperatorProcess.

/**
   * @param operator : the select operator in the analyze statement
   * @param input : the operator right before FS in the insert overwrite statement
   * @throws HiveException 
   */
private void replaceSelectOperatorProcess(SelectOperator operator, Operator<? extends OperatorDesc> input) throws HiveException {
    RowSchema selRS = operator.getSchema();
    ArrayList<ColumnInfo> signature = new ArrayList<>();
    OpParseContext inputCtx = sa.opParseCtx.get(input);
    RowResolver inputRR = inputCtx.getRowResolver();
    ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
    ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    ArrayList<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    // 1. deal with non-partition columns
    for (int i = 0; i < this.columns.size(); i++) {
        ColumnInfo col = columns.get(i);
        ExprNodeDesc exprNodeDesc = new ExprNodeColumnDesc(col);
        colList.add(exprNodeDesc);
        String internalName = selRS.getColumnNames().get(i);
        columnNames.add(internalName);
        columnExprMap.put(internalName, exprNodeDesc);
        signature.add(selRS.getSignature().get(i));
    }
    // if there is any partition column (in static partition or dynamic
    // partition or mixed case)
    int dynamicPartBegin = -1;
    for (int i = 0; i < partitionColumns.size(); i++) {
        ExprNodeDesc exprNodeDesc = null;
        String partColName = partitionColumns.get(i).getName();
        // 2. deal with static partition columns
        if (partSpec != null && partSpec.containsKey(partColName) && partSpec.get(partColName) != null) {
            if (dynamicPartBegin > 0) {
                throw new SemanticException("Dynamic partition columns should not come before static partition columns.");
            }
            exprNodeDesc = new ExprNodeConstantDesc(partSpec.get(partColName));
            TypeInfo srcType = exprNodeDesc.getTypeInfo();
            TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
            if (!srcType.equals(destType)) {
                // This may be possible when srcType is string but destType is integer
                exprNodeDesc = ParseUtils.createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
            }
        } else // 3. dynamic partition columns
        {
            dynamicPartBegin++;
            ColumnInfo col = columns.get(this.columns.size() + dynamicPartBegin);
            TypeInfo srcType = col.getType();
            TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
            exprNodeDesc = new ExprNodeColumnDesc(col);
            if (!srcType.equals(destType)) {
                exprNodeDesc = ParseUtils.createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
            }
        }
        colList.add(exprNodeDesc);
        String internalName = selRS.getColumnNames().get(this.columns.size() + i);
        columnNames.add(internalName);
        columnExprMap.put(internalName, exprNodeDesc);
        signature.add(selRS.getSignature().get(this.columns.size() + i));
    }
    operator.setConf(new SelectDesc(colList, columnNames));
    operator.setColumnExprMap(columnExprMap);
    selRS.setSignature(signature);
    operator.setSchema(selRS);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 92 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class GenericUDFUtils method deriveInType.

// Based on update() above.
public static TypeInfo deriveInType(List<ExprNodeDesc> children) {
    TypeInfo returnType = null;
    for (ExprNodeDesc node : children) {
        TypeInfo ti = node.getTypeInfo();
        if (ti.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) ti).getPrimitiveCategory() == PrimitiveCategory.VOID) {
            continue;
        }
        if (returnType == null) {
            returnType = ti;
            continue;
        }
        if (returnType == ti)
            continue;
        TypeInfo commonTypeInfo = FunctionRegistry.getCommonClass(returnType, ti);
        if (commonTypeInfo == null)
            return null;
        returnType = updateCommonTypeForDecimal(commonTypeInfo, ti, returnType);
    }
    return returnType;
}
Also used : ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 93 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class DruidOutputFormat method getHiveRecordWriter.

@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    final String segmentGranularity = tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY) != null ? tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY) : HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_INDEXING_GRANULARITY);
    final String dataSource = tableProperties.getProperty(Constants.DRUID_DATA_SOURCE);
    final String segmentDirectory = tableProperties.getProperty(Constants.DRUID_SEGMENT_DIRECTORY) != null ? tableProperties.getProperty(Constants.DRUID_SEGMENT_DIRECTORY) : HiveConf.getVar(jc, HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY);
    final HdfsDataSegmentPusherConfig hdfsDataSegmentPusherConfig = new HdfsDataSegmentPusherConfig();
    hdfsDataSegmentPusherConfig.setStorageDirectory(segmentDirectory);
    final DataSegmentPusher hdfsDataSegmentPusher = new HdfsDataSegmentPusher(hdfsDataSegmentPusherConfig, jc, DruidStorageHandlerUtils.JSON_MAPPER);
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularity.valueOf(segmentGranularity), QueryGranularity.fromString(tableProperties.getProperty(Constants.DRUID_QUERY_GRANULARITY) == null ? "NONE" : tableProperties.getProperty(Constants.DRUID_QUERY_GRANULARITY)), null);
    final String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS);
    final String columnTypeProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    if (StringUtils.isEmpty(columnNameProperty) || StringUtils.isEmpty(columnTypeProperty)) {
        throw new IllegalStateException(String.format("List of columns names [%s] or columns type [%s] is/are not present", columnNameProperty, columnTypeProperty));
    }
    ArrayList<String> columnNames = new ArrayList<String>();
    for (String name : columnNameProperty.split(",")) {
        columnNames.add(name);
    }
    if (!columnNames.contains(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
        throw new IllegalStateException("Timestamp column (' " + DruidTable.DEFAULT_TIMESTAMP_COLUMN + "') not specified in create table; list of columns is : " + tableProperties.getProperty(serdeConstants.LIST_COLUMNS));
    }
    ArrayList<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    // Default, all columns that are not metrics or timestamp, are treated as dimensions
    final List<DimensionSchema> dimensions = new ArrayList<>();
    ImmutableList.Builder<AggregatorFactory> aggregatorFactoryBuilder = ImmutableList.builder();
    for (int i = 0; i < columnTypes.size(); i++) {
        PrimitiveTypeInfo f = (PrimitiveTypeInfo) columnTypes.get(i);
        AggregatorFactory af;
        switch(f.getPrimitiveCategory()) {
            case BYTE:
            case SHORT:
            case INT:
            case LONG:
                af = new LongSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
                break;
            case FLOAT:
            case DOUBLE:
            case DECIMAL:
                af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
                break;
            case TIMESTAMP:
                String tColumnName = columnNames.get(i);
                if (!tColumnName.equals(DruidTable.DEFAULT_TIMESTAMP_COLUMN) && !tColumnName.equals(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME)) {
                    throw new IOException("Dimension " + tColumnName + " does not have STRING type: " + f.getPrimitiveCategory());
                }
                continue;
            default:
                // Dimension
                String dColumnName = columnNames.get(i);
                if (PrimitiveObjectInspectorUtils.getPrimitiveGrouping(f.getPrimitiveCategory()) != PrimitiveGrouping.STRING_GROUP) {
                    throw new IOException("Dimension " + dColumnName + " does not have STRING type: " + f.getPrimitiveCategory());
                }
                dimensions.add(new StringDimensionSchema(dColumnName));
                continue;
        }
        aggregatorFactoryBuilder.add(af);
    }
    List<AggregatorFactory> aggregatorFactories = aggregatorFactoryBuilder.build();
    final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidTable.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(dimensions, Lists.newArrayList(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME), null)));
    Map<String, Object> inputParser = DruidStorageHandlerUtils.JSON_MAPPER.convertValue(inputRowParser, Map.class);
    final DataSchema dataSchema = new DataSchema(Preconditions.checkNotNull(dataSource, "Data source name is null"), inputParser, aggregatorFactories.toArray(new AggregatorFactory[aggregatorFactories.size()]), granularitySpec, DruidStorageHandlerUtils.JSON_MAPPER);
    final String workingPath = jc.get(Constants.DRUID_JOB_WORKING_DIRECTORY);
    final String version = jc.get(Constants.DRUID_SEGMENT_VERSION);
    Integer maxPartitionSize = HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_PARTITION_SIZE);
    String basePersistDirectory = HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_BASE_PERSIST_DIRECTORY);
    if (Strings.isNullOrEmpty(basePersistDirectory)) {
        basePersistDirectory = System.getProperty("java.io.tmpdir");
    }
    Integer maxRowInMemory = HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_ROW_IN_MEMORY);
    RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(maxRowInMemory, null, null, new File(basePersistDirectory, dataSource), new CustomVersioningPolicy(version), null, null, null, null, true, 0, 0, true, null);
    LOG.debug(String.format("running with Data schema [%s] ", dataSchema));
    return new DruidRecordWriter(dataSchema, realtimeTuningConfig, hdfsDataSegmentPusher, maxPartitionSize, new Path(workingPath, SEGMENTS_DESCRIPTOR_DIR_NAME), finalOutPath.getFileSystem(jc));
}
Also used : DataSegmentPusher(io.druid.segment.loading.DataSegmentPusher) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DimensionSchema(io.druid.data.input.impl.DimensionSchema) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) TimestampSpec(io.druid.data.input.impl.TimestampSpec) Path(org.apache.hadoop.fs.Path) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) HdfsDataSegmentPusherConfig(io.druid.storage.hdfs.HdfsDataSegmentPusherConfig) IOException(java.io.IOException) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DataSchema(io.druid.segment.indexing.DataSchema) GranularitySpec(io.druid.segment.indexing.granularity.GranularitySpec) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) InputRowParser(io.druid.data.input.impl.InputRowParser) CustomVersioningPolicy(io.druid.segment.realtime.plumber.CustomVersioningPolicy) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) File(java.io.File)

Example 94 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class TypedBytesSerDe method deserializeField.

static Object deserializeField(TypedBytesWritableInput in, TypeInfo type, Object reuse) throws IOException {
    // read the type
    Class<? extends Writable> writableType = in.readType();
    if (writableType != null && writableType.isAssignableFrom(NullWritable.class)) {
        // indicates that the recorded value is null
        return null;
    }
    switch(type.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type;
                switch(ptype.getPrimitiveCategory()) {
                    case VOID:
                        {
                            return null;
                        }
                    case BOOLEAN:
                        {
                            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
                            r = in.readBoolean(r);
                            return r;
                        }
                    case BYTE:
                        {
                            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
                            r = in.readByte(r);
                            return r;
                        }
                    case SHORT:
                        {
                            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
                            r = in.readShort(r);
                            return r;
                        }
                    case INT:
                        {
                            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
                            r = in.readInt(r);
                            return r;
                        }
                    case LONG:
                        {
                            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
                            r = in.readLong(r);
                            return r;
                        }
                    case FLOAT:
                        {
                            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
                            r = in.readFloat(r);
                            return r;
                        }
                    case DOUBLE:
                        {
                            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
                            r = in.readDouble(r);
                            return r;
                        }
                    case STRING:
                        {
                            Text r = reuse == null ? new Text() : (Text) reuse;
                            r = in.readText(r);
                            return r;
                        }
                    default:
                        {
                            throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory());
                        }
                }
            }
        // Currently, deserialization of complex types is not supported
        case LIST:
        case MAP:
        case STRUCT:
        default:
            {
                throw new RuntimeException("Unsupported category: " + type.getCategory());
            }
    }
}
Also used : FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) NullWritable(org.apache.hadoop.io.NullWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) IntWritable(org.apache.hadoop.io.IntWritable)

Example 95 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class TestDruidSerDe method serializeObject.

private static void serializeObject(Properties properties, DruidSerDe serDe, Object[] rowObject, DruidWritable druidWritable) throws SerDeException {
    // Build OI with timestamp granularity column
    final List<String> columnNames = new ArrayList<>();
    final List<PrimitiveTypeInfo> columnTypes = new ArrayList<>();
    List<ObjectInspector> inspectors = new ArrayList<>();
    columnNames.addAll(Utilities.getColumnNames(properties));
    columnNames.add(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME);
    columnTypes.addAll(Lists.transform(Utilities.getColumnTypes(properties), new Function<String, PrimitiveTypeInfo>() {

        @Override
        public PrimitiveTypeInfo apply(String type) {
            return TypeInfoFactory.getPrimitiveTypeInfo(type);
        }
    }));
    columnTypes.add(TypeInfoFactory.getPrimitiveTypeInfo("timestamp"));
    inspectors.addAll(Lists.transform(columnTypes, new Function<PrimitiveTypeInfo, ObjectInspector>() {

        @Override
        public ObjectInspector apply(PrimitiveTypeInfo type) {
            return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type);
        }
    }));
    ObjectInspector inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
    // Serialize
    DruidWritable writable = (DruidWritable) serDe.serialize(rowObject, inspector);
    // Check result
    assertEquals(DRUID_WRITABLE.getValue().size(), writable.getValue().size());
    for (Entry<String, Object> e : DRUID_WRITABLE.getValue().entrySet()) {
        assertEquals(e.getValue(), writable.getValue().get(e.getKey()));
    }
}
Also used : Function(com.google.common.base.Function) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ArrayList(java.util.ArrayList) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Aggregations

PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)110 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)41 ArrayList (java.util.ArrayList)37 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)33 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)26 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)25 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)23 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)20 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)19 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)18 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)18 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)15 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)15 BytesWritable (org.apache.hadoop.io.BytesWritable)15 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)14 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)14 IntWritable (org.apache.hadoop.io.IntWritable)13 Text (org.apache.hadoop.io.Text)13 Category (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)11 BooleanWritable (org.apache.hadoop.io.BooleanWritable)11