use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class ColumnStatsAutoGatherContext method replaceSelectOperatorProcess.
/**
* @param operator : the select operator in the analyze statement
* @param input : the operator right before FS in the insert overwrite statement
* @throws HiveException
*/
private void replaceSelectOperatorProcess(SelectOperator operator, Operator<? extends OperatorDesc> input) throws HiveException {
RowSchema selRS = operator.getSchema();
ArrayList<ColumnInfo> signature = new ArrayList<>();
OpParseContext inputCtx = sa.opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
ArrayList<String> columnNames = new ArrayList<String>();
Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
// 1. deal with non-partition columns
for (int i = 0; i < this.columns.size(); i++) {
ColumnInfo col = columns.get(i);
ExprNodeDesc exprNodeDesc = new ExprNodeColumnDesc(col);
colList.add(exprNodeDesc);
String internalName = selRS.getColumnNames().get(i);
columnNames.add(internalName);
columnExprMap.put(internalName, exprNodeDesc);
signature.add(selRS.getSignature().get(i));
}
// if there is any partition column (in static partition or dynamic
// partition or mixed case)
int dynamicPartBegin = -1;
for (int i = 0; i < partitionColumns.size(); i++) {
ExprNodeDesc exprNodeDesc = null;
String partColName = partitionColumns.get(i).getName();
// 2. deal with static partition columns
if (partSpec != null && partSpec.containsKey(partColName) && partSpec.get(partColName) != null) {
if (dynamicPartBegin > 0) {
throw new SemanticException("Dynamic partition columns should not come before static partition columns.");
}
exprNodeDesc = new ExprNodeConstantDesc(partSpec.get(partColName));
TypeInfo srcType = exprNodeDesc.getTypeInfo();
TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
if (!srcType.equals(destType)) {
// This may be possible when srcType is string but destType is integer
exprNodeDesc = ParseUtils.createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
}
} else // 3. dynamic partition columns
{
dynamicPartBegin++;
ColumnInfo col = columns.get(this.columns.size() + dynamicPartBegin);
TypeInfo srcType = col.getType();
TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
exprNodeDesc = new ExprNodeColumnDesc(col);
if (!srcType.equals(destType)) {
exprNodeDesc = ParseUtils.createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
}
}
colList.add(exprNodeDesc);
String internalName = selRS.getColumnNames().get(this.columns.size() + i);
columnNames.add(internalName);
columnExprMap.put(internalName, exprNodeDesc);
signature.add(selRS.getSignature().get(this.columns.size() + i));
}
operator.setConf(new SelectDesc(colList, columnNames));
operator.setColumnExprMap(columnExprMap);
selRS.setSignature(signature);
operator.setSchema(selRS);
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class GenericUDFUtils method deriveInType.
// Based on update() above.
public static TypeInfo deriveInType(List<ExprNodeDesc> children) {
TypeInfo returnType = null;
for (ExprNodeDesc node : children) {
TypeInfo ti = node.getTypeInfo();
if (ti.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) ti).getPrimitiveCategory() == PrimitiveCategory.VOID) {
continue;
}
if (returnType == null) {
returnType = ti;
continue;
}
if (returnType == ti)
continue;
TypeInfo commonTypeInfo = FunctionRegistry.getCommonClass(returnType, ti);
if (commonTypeInfo == null)
return null;
returnType = updateCommonTypeForDecimal(commonTypeInfo, ti, returnType);
}
return returnType;
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class DruidOutputFormat method getHiveRecordWriter.
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
final String segmentGranularity = tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY) != null ? tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY) : HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_INDEXING_GRANULARITY);
final String dataSource = tableProperties.getProperty(Constants.DRUID_DATA_SOURCE);
final String segmentDirectory = tableProperties.getProperty(Constants.DRUID_SEGMENT_DIRECTORY) != null ? tableProperties.getProperty(Constants.DRUID_SEGMENT_DIRECTORY) : HiveConf.getVar(jc, HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY);
final HdfsDataSegmentPusherConfig hdfsDataSegmentPusherConfig = new HdfsDataSegmentPusherConfig();
hdfsDataSegmentPusherConfig.setStorageDirectory(segmentDirectory);
final DataSegmentPusher hdfsDataSegmentPusher = new HdfsDataSegmentPusher(hdfsDataSegmentPusherConfig, jc, DruidStorageHandlerUtils.JSON_MAPPER);
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularity.valueOf(segmentGranularity), QueryGranularity.fromString(tableProperties.getProperty(Constants.DRUID_QUERY_GRANULARITY) == null ? "NONE" : tableProperties.getProperty(Constants.DRUID_QUERY_GRANULARITY)), null);
final String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS);
final String columnTypeProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
if (StringUtils.isEmpty(columnNameProperty) || StringUtils.isEmpty(columnTypeProperty)) {
throw new IllegalStateException(String.format("List of columns names [%s] or columns type [%s] is/are not present", columnNameProperty, columnTypeProperty));
}
ArrayList<String> columnNames = new ArrayList<String>();
for (String name : columnNameProperty.split(",")) {
columnNames.add(name);
}
if (!columnNames.contains(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
throw new IllegalStateException("Timestamp column (' " + DruidTable.DEFAULT_TIMESTAMP_COLUMN + "') not specified in create table; list of columns is : " + tableProperties.getProperty(serdeConstants.LIST_COLUMNS));
}
ArrayList<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
// Default, all columns that are not metrics or timestamp, are treated as dimensions
final List<DimensionSchema> dimensions = new ArrayList<>();
ImmutableList.Builder<AggregatorFactory> aggregatorFactoryBuilder = ImmutableList.builder();
for (int i = 0; i < columnTypes.size(); i++) {
PrimitiveTypeInfo f = (PrimitiveTypeInfo) columnTypes.get(i);
AggregatorFactory af;
switch(f.getPrimitiveCategory()) {
case BYTE:
case SHORT:
case INT:
case LONG:
af = new LongSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
break;
case FLOAT:
case DOUBLE:
case DECIMAL:
af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
break;
case TIMESTAMP:
String tColumnName = columnNames.get(i);
if (!tColumnName.equals(DruidTable.DEFAULT_TIMESTAMP_COLUMN) && !tColumnName.equals(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME)) {
throw new IOException("Dimension " + tColumnName + " does not have STRING type: " + f.getPrimitiveCategory());
}
continue;
default:
// Dimension
String dColumnName = columnNames.get(i);
if (PrimitiveObjectInspectorUtils.getPrimitiveGrouping(f.getPrimitiveCategory()) != PrimitiveGrouping.STRING_GROUP) {
throw new IOException("Dimension " + dColumnName + " does not have STRING type: " + f.getPrimitiveCategory());
}
dimensions.add(new StringDimensionSchema(dColumnName));
continue;
}
aggregatorFactoryBuilder.add(af);
}
List<AggregatorFactory> aggregatorFactories = aggregatorFactoryBuilder.build();
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidTable.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(dimensions, Lists.newArrayList(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME), null)));
Map<String, Object> inputParser = DruidStorageHandlerUtils.JSON_MAPPER.convertValue(inputRowParser, Map.class);
final DataSchema dataSchema = new DataSchema(Preconditions.checkNotNull(dataSource, "Data source name is null"), inputParser, aggregatorFactories.toArray(new AggregatorFactory[aggregatorFactories.size()]), granularitySpec, DruidStorageHandlerUtils.JSON_MAPPER);
final String workingPath = jc.get(Constants.DRUID_JOB_WORKING_DIRECTORY);
final String version = jc.get(Constants.DRUID_SEGMENT_VERSION);
Integer maxPartitionSize = HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_PARTITION_SIZE);
String basePersistDirectory = HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_BASE_PERSIST_DIRECTORY);
if (Strings.isNullOrEmpty(basePersistDirectory)) {
basePersistDirectory = System.getProperty("java.io.tmpdir");
}
Integer maxRowInMemory = HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_ROW_IN_MEMORY);
RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(maxRowInMemory, null, null, new File(basePersistDirectory, dataSource), new CustomVersioningPolicy(version), null, null, null, null, true, 0, 0, true, null);
LOG.debug(String.format("running with Data schema [%s] ", dataSchema));
return new DruidRecordWriter(dataSchema, realtimeTuningConfig, hdfsDataSegmentPusher, maxPartitionSize, new Path(workingPath, SEGMENTS_DESCRIPTOR_DIR_NAME), finalOutPath.getFileSystem(jc));
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class TypedBytesSerDe method deserializeField.
static Object deserializeField(TypedBytesWritableInput in, TypeInfo type, Object reuse) throws IOException {
// read the type
Class<? extends Writable> writableType = in.readType();
if (writableType != null && writableType.isAssignableFrom(NullWritable.class)) {
// indicates that the recorded value is null
return null;
}
switch(type.getCategory()) {
case PRIMITIVE:
{
PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type;
switch(ptype.getPrimitiveCategory()) {
case VOID:
{
return null;
}
case BOOLEAN:
{
BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
r = in.readBoolean(r);
return r;
}
case BYTE:
{
ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
r = in.readByte(r);
return r;
}
case SHORT:
{
ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
r = in.readShort(r);
return r;
}
case INT:
{
IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
r = in.readInt(r);
return r;
}
case LONG:
{
LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
r = in.readLong(r);
return r;
}
case FLOAT:
{
FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
r = in.readFloat(r);
return r;
}
case DOUBLE:
{
DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
r = in.readDouble(r);
return r;
}
case STRING:
{
Text r = reuse == null ? new Text() : (Text) reuse;
r = in.readText(r);
return r;
}
default:
{
throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory());
}
}
}
// Currently, deserialization of complex types is not supported
case LIST:
case MAP:
case STRUCT:
default:
{
throw new RuntimeException("Unsupported category: " + type.getCategory());
}
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class TestDruidSerDe method serializeObject.
private static void serializeObject(Properties properties, DruidSerDe serDe, Object[] rowObject, DruidWritable druidWritable) throws SerDeException {
// Build OI with timestamp granularity column
final List<String> columnNames = new ArrayList<>();
final List<PrimitiveTypeInfo> columnTypes = new ArrayList<>();
List<ObjectInspector> inspectors = new ArrayList<>();
columnNames.addAll(Utilities.getColumnNames(properties));
columnNames.add(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME);
columnTypes.addAll(Lists.transform(Utilities.getColumnTypes(properties), new Function<String, PrimitiveTypeInfo>() {
@Override
public PrimitiveTypeInfo apply(String type) {
return TypeInfoFactory.getPrimitiveTypeInfo(type);
}
}));
columnTypes.add(TypeInfoFactory.getPrimitiveTypeInfo("timestamp"));
inspectors.addAll(Lists.transform(columnTypes, new Function<PrimitiveTypeInfo, ObjectInspector>() {
@Override
public ObjectInspector apply(PrimitiveTypeInfo type) {
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type);
}
}));
ObjectInspector inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
// Serialize
DruidWritable writable = (DruidWritable) serDe.serialize(rowObject, inspector);
// Check result
assertEquals(DRUID_WRITABLE.getValue().size(), writable.getValue().size());
for (Entry<String, Object> e : DRUID_WRITABLE.getValue().entrySet()) {
assertEquals(e.getValue(), writable.getValue().get(e.getKey()));
}
}
Aggregations