use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class MyTestPrimitiveClass method getPrimitiveTypeInfo.
public static PrimitiveTypeInfo getPrimitiveTypeInfo(int index, ExtraTypeInfo extraTypeInfo) {
PrimitiveCategory primitiveCategory = getPrimitiveCategory(index);
String typeName;
switch(primitiveCategory) {
case BYTE:
typeName = "tinyint";
break;
case SHORT:
typeName = "smallint";
break;
case LONG:
typeName = "bigint";
break;
case CHAR:
typeName = String.format("char(%d)", extraTypeInfo.hiveCharMaxLength);
break;
case VARCHAR:
typeName = String.format("varchar(%d)", extraTypeInfo.hiveVarcharMaxLength);
break;
case DECIMAL:
typeName = String.format("decimal(%d,%d)", extraTypeInfo.precision, extraTypeInfo.scale);
break;
default:
// No type name difference or adornment.
typeName = primitiveCategory.name().toLowerCase();
break;
}
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
return primitiveTypeInfo;
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class DruidSerDe method inferSchema.
/* Select query */
private void inferSchema(SelectQuery query, List<String> columnNames, List<PrimitiveTypeInfo> columnTypes, String address) throws SerDeException {
// Timestamp column
columnNames.add(DruidTable.DEFAULT_TIMESTAMP_COLUMN);
columnTypes.add(TypeInfoFactory.timestampTypeInfo);
// Dimension columns
for (DimensionSpec ds : query.getDimensions()) {
columnNames.add(ds.getOutputName());
columnTypes.add(TypeInfoFactory.stringTypeInfo);
}
// The type for metric columns is not explicit in the query, thus in this case
// we need to emit a metadata query to know their type
SegmentMetadataQueryBuilder builder = new Druids.SegmentMetadataQueryBuilder();
builder.dataSource(query.getDataSource());
builder.merge(true);
builder.analysisTypes();
SegmentMetadataQuery metadataQuery = builder.build();
// Execute query in Druid
SegmentAnalysis schemaInfo;
try {
schemaInfo = submitMetadataRequest(address, metadataQuery);
} catch (IOException e) {
throw new SerDeException(e);
}
if (schemaInfo == null) {
throw new SerDeException("Connected to Druid but could not retrieve datasource information");
}
for (String metric : query.getMetrics()) {
columnNames.add(metric);
columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(schemaInfo.getColumns().get(metric).getType()));
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class DruidSerDe method initialize.
@Override
public void initialize(Configuration configuration, Properties properties) throws SerDeException {
// Init connection properties
numConnection = HiveConf.getIntVar(configuration, HiveConf.ConfVars.HIVE_DRUID_NUM_HTTP_CONNECTION);
readTimeout = new Period(HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_DRUID_HTTP_READ_TIMEOUT));
final List<String> columnNames = new ArrayList<>();
final List<PrimitiveTypeInfo> columnTypes = new ArrayList<>();
List<ObjectInspector> inspectors = new ArrayList<>();
// Druid query
String druidQuery = properties.getProperty(Constants.DRUID_QUERY_JSON);
if (druidQuery == null) {
// the data source (dimensions and metrics).
if (!org.apache.commons.lang3.StringUtils.isEmpty(properties.getProperty(serdeConstants.LIST_COLUMNS)) && !org.apache.commons.lang3.StringUtils.isEmpty(properties.getProperty(serdeConstants.LIST_COLUMN_TYPES))) {
columnNames.addAll(Utilities.getColumnNames(properties));
if (!columnNames.contains(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
throw new SerDeException("Timestamp column (' " + DruidTable.DEFAULT_TIMESTAMP_COLUMN + "') not specified in create table; list of columns is : " + properties.getProperty(serdeConstants.LIST_COLUMNS));
}
columnTypes.addAll(Lists.transform(Utilities.getColumnTypes(properties), new Function<String, PrimitiveTypeInfo>() {
@Override
public PrimitiveTypeInfo apply(String type) {
return TypeInfoFactory.getPrimitiveTypeInfo(type);
}
}));
inspectors.addAll(Lists.transform(columnTypes, new Function<PrimitiveTypeInfo, ObjectInspector>() {
@Override
public ObjectInspector apply(PrimitiveTypeInfo type) {
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type);
}
}));
columns = columnNames.toArray(new String[columnNames.size()]);
types = columnTypes.toArray(new PrimitiveTypeInfo[columnTypes.size()]);
inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
} else {
String dataSource = properties.getProperty(Constants.DRUID_DATA_SOURCE);
if (dataSource == null) {
throw new SerDeException("Druid data source not specified; use " + Constants.DRUID_DATA_SOURCE + " in table properties");
}
SegmentMetadataQueryBuilder builder = new Druids.SegmentMetadataQueryBuilder();
builder.dataSource(dataSource);
builder.merge(true);
builder.analysisTypes();
SegmentMetadataQuery query = builder.build();
// Execute query in Druid
String address = HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
if (org.apache.commons.lang3.StringUtils.isEmpty(address)) {
throw new SerDeException("Druid broker address not specified in configuration");
}
// Infer schema
SegmentAnalysis schemaInfo;
try {
schemaInfo = submitMetadataRequest(address, query);
} catch (IOException e) {
throw new SerDeException(e);
}
for (Entry<String, ColumnAnalysis> columnInfo : schemaInfo.getColumns().entrySet()) {
if (columnInfo.getKey().equals(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
// Special handling for timestamp column
// field name
columnNames.add(columnInfo.getKey());
// field type
PrimitiveTypeInfo type = TypeInfoFactory.timestampTypeInfo;
columnTypes.add(type);
inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type));
continue;
}
// field name
columnNames.add(columnInfo.getKey());
PrimitiveTypeInfo type = DruidSerDeUtils.convertDruidToHiveType(// field type
columnInfo.getValue().getType());
columnTypes.add(type);
inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type));
}
columns = columnNames.toArray(new String[columnNames.size()]);
types = columnTypes.toArray(new PrimitiveTypeInfo[columnTypes.size()]);
inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}
} else {
// Query is specified, we can extract the results schema from the query
Query<?> query;
try {
query = DruidStorageHandlerUtils.JSON_MAPPER.readValue(druidQuery, Query.class);
switch(query.getType()) {
case Query.TIMESERIES:
inferSchema((TimeseriesQuery) query, columnNames, columnTypes);
break;
case Query.TOPN:
inferSchema((TopNQuery) query, columnNames, columnTypes);
break;
case Query.SELECT:
String address = HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
if (org.apache.commons.lang3.StringUtils.isEmpty(address)) {
throw new SerDeException("Druid broker address not specified in configuration");
}
inferSchema((SelectQuery) query, columnNames, columnTypes, address);
break;
case Query.GROUP_BY:
inferSchema((GroupByQuery) query, columnNames, columnTypes);
break;
default:
throw new SerDeException("Not supported Druid query");
}
} catch (Exception e) {
throw new SerDeException(e);
}
columns = new String[columnNames.size()];
types = new PrimitiveTypeInfo[columnNames.size()];
for (int i = 0; i < columnTypes.size(); ++i) {
columns[i] = columnNames.get(i);
types[i] = columnTypes.get(i);
inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(types[i]));
}
inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}
if (LOG.isDebugEnabled()) {
LOG.debug("DruidSerDe initialized with\n" + "\t columns: " + columnNames + "\n\t types: " + columnTypes);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class ObjectStore method getPartitionNamesPrunedByExprNoTxn.
/**
* Gets the partition names from a table, pruned using an expression.
* @param table Table.
* @param expr Expression.
* @param defaultPartName Default partition name from job config, if any.
* @param maxParts Maximum number of partition names to return.
* @param result The resulting names.
* @return Whether the result contains any unknown partitions.
*/
private boolean getPartitionNamesPrunedByExprNoTxn(Table table, byte[] expr, String defaultPartName, short maxParts, List<String> result) throws MetaException {
result.addAll(getPartitionNamesNoTxn(table.getDbName(), table.getTableName(), maxParts));
List<String> columnNames = new ArrayList<String>();
List<PrimitiveTypeInfo> typeInfos = new ArrayList<PrimitiveTypeInfo>();
for (FieldSchema fs : table.getPartitionKeys()) {
columnNames.add(fs.getName());
typeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(fs.getType()));
}
if (defaultPartName == null || defaultPartName.isEmpty()) {
defaultPartName = HiveConf.getVar(getConf(), HiveConf.ConfVars.DEFAULTPARTITIONNAME);
}
return expressionProxy.filterPartitionsByExpr(columnNames, typeInfos, expr, defaultPartName, result);
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class VectorPartitionConversion method isImplicitVectorColumnConversion.
public static boolean isImplicitVectorColumnConversion(TypeInfo fromTypeInfo, TypeInfo toTypeInfo) {
if (fromTypeInfo.getCategory() == Category.PRIMITIVE && toTypeInfo.getCategory() == Category.PRIMITIVE) {
PrimitiveCategory fromPrimitiveCategory = ((PrimitiveTypeInfo) fromTypeInfo).getPrimitiveCategory();
PrimitiveCategory toPrimitiveCategory = ((PrimitiveTypeInfo) toTypeInfo).getPrimitiveCategory();
PrimitiveCategory[] toPrimitiveCategories = implicitPrimitiveMap.get(fromPrimitiveCategory);
if (toPrimitiveCategories != null) {
for (PrimitiveCategory candidatePrimitiveCategory : toPrimitiveCategories) {
if (candidatePrimitiveCategory == toPrimitiveCategory) {
return true;
}
}
}
return false;
}
return false;
}
Aggregations