Search in sources :

Example 21 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class MyTestPrimitiveClass method getPrimitiveTypeInfo.

public static PrimitiveTypeInfo getPrimitiveTypeInfo(int index, ExtraTypeInfo extraTypeInfo) {
    PrimitiveCategory primitiveCategory = getPrimitiveCategory(index);
    String typeName;
    switch(primitiveCategory) {
        case BYTE:
            typeName = "tinyint";
            break;
        case SHORT:
            typeName = "smallint";
            break;
        case LONG:
            typeName = "bigint";
            break;
        case CHAR:
            typeName = String.format("char(%d)", extraTypeInfo.hiveCharMaxLength);
            break;
        case VARCHAR:
            typeName = String.format("varchar(%d)", extraTypeInfo.hiveVarcharMaxLength);
            break;
        case DECIMAL:
            typeName = String.format("decimal(%d,%d)", extraTypeInfo.precision, extraTypeInfo.scale);
            break;
        default:
            // No type name difference or adornment.
            typeName = primitiveCategory.name().toLowerCase();
            break;
    }
    PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
    return primitiveTypeInfo;
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 22 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class DruidSerDe method inferSchema.

/* Select query */
private void inferSchema(SelectQuery query, List<String> columnNames, List<PrimitiveTypeInfo> columnTypes, String address) throws SerDeException {
    // Timestamp column
    columnNames.add(DruidTable.DEFAULT_TIMESTAMP_COLUMN);
    columnTypes.add(TypeInfoFactory.timestampTypeInfo);
    // Dimension columns
    for (DimensionSpec ds : query.getDimensions()) {
        columnNames.add(ds.getOutputName());
        columnTypes.add(TypeInfoFactory.stringTypeInfo);
    }
    // The type for metric columns is not explicit in the query, thus in this case
    // we need to emit a metadata query to know their type
    SegmentMetadataQueryBuilder builder = new Druids.SegmentMetadataQueryBuilder();
    builder.dataSource(query.getDataSource());
    builder.merge(true);
    builder.analysisTypes();
    SegmentMetadataQuery metadataQuery = builder.build();
    // Execute query in Druid
    SegmentAnalysis schemaInfo;
    try {
        schemaInfo = submitMetadataRequest(address, metadataQuery);
    } catch (IOException e) {
        throw new SerDeException(e);
    }
    if (schemaInfo == null) {
        throw new SerDeException("Connected to Druid but could not retrieve datasource information");
    }
    for (String metric : query.getMetrics()) {
        columnNames.add(metric);
        columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(schemaInfo.getColumns().get(metric).getType()));
    }
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) SegmentMetadataQuery(io.druid.query.metadata.metadata.SegmentMetadataQuery) SegmentMetadataQueryBuilder(io.druid.query.Druids.SegmentMetadataQueryBuilder) SegmentAnalysis(io.druid.query.metadata.metadata.SegmentAnalysis) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 23 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class DruidSerDe method initialize.

@Override
public void initialize(Configuration configuration, Properties properties) throws SerDeException {
    // Init connection properties
    numConnection = HiveConf.getIntVar(configuration, HiveConf.ConfVars.HIVE_DRUID_NUM_HTTP_CONNECTION);
    readTimeout = new Period(HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_DRUID_HTTP_READ_TIMEOUT));
    final List<String> columnNames = new ArrayList<>();
    final List<PrimitiveTypeInfo> columnTypes = new ArrayList<>();
    List<ObjectInspector> inspectors = new ArrayList<>();
    // Druid query
    String druidQuery = properties.getProperty(Constants.DRUID_QUERY_JSON);
    if (druidQuery == null) {
        // the data source (dimensions and metrics).
        if (!org.apache.commons.lang3.StringUtils.isEmpty(properties.getProperty(serdeConstants.LIST_COLUMNS)) && !org.apache.commons.lang3.StringUtils.isEmpty(properties.getProperty(serdeConstants.LIST_COLUMN_TYPES))) {
            columnNames.addAll(Utilities.getColumnNames(properties));
            if (!columnNames.contains(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
                throw new SerDeException("Timestamp column (' " + DruidTable.DEFAULT_TIMESTAMP_COLUMN + "') not specified in create table; list of columns is : " + properties.getProperty(serdeConstants.LIST_COLUMNS));
            }
            columnTypes.addAll(Lists.transform(Utilities.getColumnTypes(properties), new Function<String, PrimitiveTypeInfo>() {

                @Override
                public PrimitiveTypeInfo apply(String type) {
                    return TypeInfoFactory.getPrimitiveTypeInfo(type);
                }
            }));
            inspectors.addAll(Lists.transform(columnTypes, new Function<PrimitiveTypeInfo, ObjectInspector>() {

                @Override
                public ObjectInspector apply(PrimitiveTypeInfo type) {
                    return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type);
                }
            }));
            columns = columnNames.toArray(new String[columnNames.size()]);
            types = columnTypes.toArray(new PrimitiveTypeInfo[columnTypes.size()]);
            inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
        } else {
            String dataSource = properties.getProperty(Constants.DRUID_DATA_SOURCE);
            if (dataSource == null) {
                throw new SerDeException("Druid data source not specified; use " + Constants.DRUID_DATA_SOURCE + " in table properties");
            }
            SegmentMetadataQueryBuilder builder = new Druids.SegmentMetadataQueryBuilder();
            builder.dataSource(dataSource);
            builder.merge(true);
            builder.analysisTypes();
            SegmentMetadataQuery query = builder.build();
            // Execute query in Druid
            String address = HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
            if (org.apache.commons.lang3.StringUtils.isEmpty(address)) {
                throw new SerDeException("Druid broker address not specified in configuration");
            }
            // Infer schema
            SegmentAnalysis schemaInfo;
            try {
                schemaInfo = submitMetadataRequest(address, query);
            } catch (IOException e) {
                throw new SerDeException(e);
            }
            for (Entry<String, ColumnAnalysis> columnInfo : schemaInfo.getColumns().entrySet()) {
                if (columnInfo.getKey().equals(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
                    // Special handling for timestamp column
                    // field name
                    columnNames.add(columnInfo.getKey());
                    // field type
                    PrimitiveTypeInfo type = TypeInfoFactory.timestampTypeInfo;
                    columnTypes.add(type);
                    inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type));
                    continue;
                }
                // field name
                columnNames.add(columnInfo.getKey());
                PrimitiveTypeInfo type = DruidSerDeUtils.convertDruidToHiveType(// field type
                columnInfo.getValue().getType());
                columnTypes.add(type);
                inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type));
            }
            columns = columnNames.toArray(new String[columnNames.size()]);
            types = columnTypes.toArray(new PrimitiveTypeInfo[columnTypes.size()]);
            inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
        }
    } else {
        // Query is specified, we can extract the results schema from the query
        Query<?> query;
        try {
            query = DruidStorageHandlerUtils.JSON_MAPPER.readValue(druidQuery, Query.class);
            switch(query.getType()) {
                case Query.TIMESERIES:
                    inferSchema((TimeseriesQuery) query, columnNames, columnTypes);
                    break;
                case Query.TOPN:
                    inferSchema((TopNQuery) query, columnNames, columnTypes);
                    break;
                case Query.SELECT:
                    String address = HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
                    if (org.apache.commons.lang3.StringUtils.isEmpty(address)) {
                        throw new SerDeException("Druid broker address not specified in configuration");
                    }
                    inferSchema((SelectQuery) query, columnNames, columnTypes, address);
                    break;
                case Query.GROUP_BY:
                    inferSchema((GroupByQuery) query, columnNames, columnTypes);
                    break;
                default:
                    throw new SerDeException("Not supported Druid query");
            }
        } catch (Exception e) {
            throw new SerDeException(e);
        }
        columns = new String[columnNames.size()];
        types = new PrimitiveTypeInfo[columnNames.size()];
        for (int i = 0; i < columnTypes.size(); ++i) {
            columns[i] = columnNames.get(i);
            types[i] = columnTypes.get(i);
            inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(types[i]));
        }
        inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("DruidSerDe initialized with\n" + "\t columns: " + columnNames + "\n\t types: " + columnTypes);
    }
}
Also used : HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) SelectQuery(io.druid.query.select.SelectQuery) TimeseriesQuery(io.druid.query.timeseries.TimeseriesQuery) Query(io.druid.query.Query) TopNQuery(io.druid.query.topn.TopNQuery) SegmentMetadataQuery(io.druid.query.metadata.metadata.SegmentMetadataQuery) GroupByQuery(io.druid.query.groupby.GroupByQuery) ArrayList(java.util.ArrayList) Period(org.joda.time.Period) IOException(java.io.IOException) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) Function(com.google.common.base.Function) SegmentMetadataQuery(io.druid.query.metadata.metadata.SegmentMetadataQuery) ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) SegmentMetadataQueryBuilder(io.druid.query.Druids.SegmentMetadataQueryBuilder) SegmentAnalysis(io.druid.query.metadata.metadata.SegmentAnalysis) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 24 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class ObjectStore method getPartitionNamesPrunedByExprNoTxn.

/**
   * Gets the partition names from a table, pruned using an expression.
   * @param table Table.
   * @param expr Expression.
   * @param defaultPartName Default partition name from job config, if any.
   * @param maxParts Maximum number of partition names to return.
   * @param result The resulting names.
   * @return Whether the result contains any unknown partitions.
   */
private boolean getPartitionNamesPrunedByExprNoTxn(Table table, byte[] expr, String defaultPartName, short maxParts, List<String> result) throws MetaException {
    result.addAll(getPartitionNamesNoTxn(table.getDbName(), table.getTableName(), maxParts));
    List<String> columnNames = new ArrayList<String>();
    List<PrimitiveTypeInfo> typeInfos = new ArrayList<PrimitiveTypeInfo>();
    for (FieldSchema fs : table.getPartitionKeys()) {
        columnNames.add(fs.getName());
        typeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(fs.getType()));
    }
    if (defaultPartName == null || defaultPartName.isEmpty()) {
        defaultPartName = HiveConf.getVar(getConf(), HiveConf.ConfVars.DEFAULTPARTITIONNAME);
    }
    return expressionProxy.filterPartitionsByExpr(columnNames, typeInfos, expr, defaultPartName, result);
}
Also used : MFieldSchema(org.apache.hadoop.hive.metastore.model.MFieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 25 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class VectorPartitionConversion method isImplicitVectorColumnConversion.

public static boolean isImplicitVectorColumnConversion(TypeInfo fromTypeInfo, TypeInfo toTypeInfo) {
    if (fromTypeInfo.getCategory() == Category.PRIMITIVE && toTypeInfo.getCategory() == Category.PRIMITIVE) {
        PrimitiveCategory fromPrimitiveCategory = ((PrimitiveTypeInfo) fromTypeInfo).getPrimitiveCategory();
        PrimitiveCategory toPrimitiveCategory = ((PrimitiveTypeInfo) toTypeInfo).getPrimitiveCategory();
        PrimitiveCategory[] toPrimitiveCategories = implicitPrimitiveMap.get(fromPrimitiveCategory);
        if (toPrimitiveCategories != null) {
            for (PrimitiveCategory candidatePrimitiveCategory : toPrimitiveCategories) {
                if (candidatePrimitiveCategory == toPrimitiveCategory) {
                    return true;
                }
            }
        }
        return false;
    }
    return false;
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Aggregations

PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)110 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)41 ArrayList (java.util.ArrayList)37 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)33 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)26 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)25 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)23 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)20 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)19 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)18 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)18 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)15 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)15 BytesWritable (org.apache.hadoop.io.BytesWritable)15 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)14 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)14 IntWritable (org.apache.hadoop.io.IntWritable)13 Text (org.apache.hadoop.io.Text)13 Category (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)11 BooleanWritable (org.apache.hadoop.io.BooleanWritable)11