use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project cdap by caskdata.
the class ObjectInspectorFactory method getReflectionObjectInspectorNoCache.
private static ObjectInspector getReflectionObjectInspectorNoCache(Type t) {
if (t instanceof GenericArrayType) {
GenericArrayType at = (GenericArrayType) t;
return getStandardListObjectInspector(getReflectionObjectInspector(at.getGenericComponentType()));
}
Map<TypeVariable, Type> genericTypes = null;
if (t instanceof ParameterizedType) {
ParameterizedType pt = (ParameterizedType) t;
Type rawType = pt.getRawType();
// Collection?
if (Collection.class.isAssignableFrom((Class<?>) rawType)) {
return getStandardListObjectInspector(getReflectionObjectInspector(pt.getActualTypeArguments()[0]));
}
// Map?
if (Map.class.isAssignableFrom((Class<?>) rawType)) {
return getStandardMapObjectInspector(getReflectionObjectInspector(pt.getActualTypeArguments()[0]), getReflectionObjectInspector(pt.getActualTypeArguments()[1]));
}
// Otherwise convert t to RawType so we will fall into the following if block.
t = rawType;
ImmutableMap.Builder<TypeVariable, Type> builder = ImmutableMap.builder();
for (int i = 0; i < pt.getActualTypeArguments().length; i++) {
builder.put(((Class<?>) t).getTypeParameters()[i], pt.getActualTypeArguments()[i]);
}
genericTypes = builder.build();
}
// Must be a class.
if (!(t instanceof Class)) {
throw new RuntimeException(ObjectInspectorFactory.class.getName() + " internal error:" + t);
}
Class<?> c = (Class<?>) t;
// Java Primitive Type?
if (PrimitiveObjectInspectorUtils.isPrimitiveJavaType(c)) {
return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaType(c).primitiveCategory);
}
// Java Primitive Class?
if (PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(c)) {
return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaClass(c).primitiveCategory);
}
// Primitive Writable class?
if (PrimitiveObjectInspectorUtils.isPrimitiveWritableClass(c)) {
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveWritableClass(c).primitiveCategory);
}
// Enum class?
if (Enum.class.isAssignableFrom(c)) {
return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
}
// Array
if (c.isArray()) {
return getStandardListObjectInspector(getReflectionObjectInspector(c.getComponentType()));
}
// Must be struct because List and Map need to be ParameterizedType
Preconditions.checkState(!List.class.isAssignableFrom(c));
Preconditions.checkState(!Map.class.isAssignableFrom(c));
Preconditions.checkState(!c.isInterface(), "Cannot inspect an interface.");
ReflectionStructObjectInspector oi = new ReflectionStructObjectInspector();
// put it into the cache BEFORE it is initialized to make sure we can catch
// recursive types.
objectInspectorCache.put(t, oi);
Field[] fields = ObjectInspectorUtils.getDeclaredNonStaticFields(c);
List<ObjectInspector> structFieldObjectInspectors = new ArrayList<>(fields.length);
for (Field field : fields) {
// "this" pointer present in nested classes and that references the parent.
if (Modifier.isTransient(field.getModifiers()) || field.isSynthetic()) {
continue;
}
if (!oi.shouldIgnoreField(field.getName())) {
Type newType = field.getGenericType();
if (newType instanceof TypeVariable) {
Preconditions.checkNotNull(genericTypes, "Type was not recognized as a parameterized type.");
Preconditions.checkNotNull(genericTypes.get(newType), "Generic type " + newType + " not a parameter of class " + c);
newType = genericTypes.get(newType);
}
structFieldObjectInspectors.add(getReflectionObjectInspector(newType));
}
}
oi.init(c, structFieldObjectInspectors);
return oi;
}
use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.
the class DruidOutputFormat method getHiveRecordWriter.
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
final String segmentGranularity = tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY) != null ? tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY) : HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_INDEXING_GRANULARITY);
final int targetNumShardsPerGranularity = Integer.parseUnsignedInt(tableProperties.getProperty(Constants.DRUID_TARGET_SHARDS_PER_GRANULARITY, "0"));
final int maxPartitionSize = targetNumShardsPerGranularity > 0 ? -1 : HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_PARTITION_SIZE);
// If datasource is in the table properties, it is an INSERT/INSERT OVERWRITE as the datasource
// name was already persisted. Otherwise, it is a CT/CTAS and we need to get the name from the
// job properties that are set by configureOutputJobProperties in the DruidStorageHandler
final String dataSource = tableProperties.getProperty(Constants.DRUID_DATA_SOURCE) == null ? jc.get(Constants.DRUID_DATA_SOURCE) : tableProperties.getProperty(Constants.DRUID_DATA_SOURCE);
final String segmentDirectory = jc.get(Constants.DRUID_SEGMENT_INTERMEDIATE_DIRECTORY);
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularity.fromString(segmentGranularity), Granularity.fromString(tableProperties.getProperty(Constants.DRUID_QUERY_GRANULARITY) == null ? "NONE" : tableProperties.getProperty(Constants.DRUID_QUERY_GRANULARITY)), null);
final String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS);
final String columnTypeProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
if (StringUtils.isEmpty(columnNameProperty) || StringUtils.isEmpty(columnTypeProperty)) {
throw new IllegalStateException(String.format("List of columns names [%s] or columns type [%s] is/are not present", columnNameProperty, columnTypeProperty));
}
ArrayList<String> columnNames = new ArrayList<String>();
for (String name : columnNameProperty.split(",")) {
columnNames.add(name);
}
if (!columnNames.contains(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN)) {
throw new IllegalStateException("Timestamp column (' " + DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN + "') not specified in create table; list of columns is : " + tableProperties.getProperty(serdeConstants.LIST_COLUMNS));
}
ArrayList<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
final boolean approximationAllowed = HiveConf.getBoolVar(jc, HiveConf.ConfVars.HIVE_DRUID_APPROX_RESULT);
// Default, all columns that are not metrics or timestamp, are treated as dimensions
final List<DimensionSchema> dimensions = new ArrayList<>();
ImmutableList.Builder<AggregatorFactory> aggregatorFactoryBuilder = ImmutableList.builder();
for (int i = 0; i < columnTypes.size(); i++) {
final PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) columnTypes.get(i)).getPrimitiveCategory();
AggregatorFactory af;
switch(primitiveCategory) {
case BYTE:
case SHORT:
case INT:
case LONG:
af = new LongSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
break;
case FLOAT:
case DOUBLE:
af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
break;
case DECIMAL:
if (approximationAllowed) {
af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
} else {
throw new UnsupportedOperationException(String.format("Druid does not support decimal column type." + "Either cast column [%s] to double or Enable Approximate Result for Druid by setting property [%s] to true", columnNames.get(i), HiveConf.ConfVars.HIVE_DRUID_APPROX_RESULT.varname));
}
break;
case TIMESTAMP:
// Granularity column
String tColumnName = columnNames.get(i);
if (!tColumnName.equals(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME)) {
throw new IOException("Dimension " + tColumnName + " does not have STRING type: " + primitiveCategory);
}
continue;
case TIMESTAMPLOCALTZ:
// Druid timestamp column
String tLocalTZColumnName = columnNames.get(i);
if (!tLocalTZColumnName.equals(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN)) {
throw new IOException("Dimension " + tLocalTZColumnName + " does not have STRING type: " + primitiveCategory);
}
continue;
default:
// Dimension
String dColumnName = columnNames.get(i);
if (PrimitiveObjectInspectorUtils.getPrimitiveGrouping(primitiveCategory) != PrimitiveGrouping.STRING_GROUP && primitiveCategory != PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN) {
throw new IOException("Dimension " + dColumnName + " does not have STRING type: " + primitiveCategory);
}
dimensions.add(new StringDimensionSchema(dColumnName));
continue;
}
aggregatorFactoryBuilder.add(af);
}
List<AggregatorFactory> aggregatorFactories = aggregatorFactoryBuilder.build();
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(dimensions, Lists.newArrayList(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Constants.DRUID_SHARD_KEY_COL_NAME), null)));
Map<String, Object> inputParser = DruidStorageHandlerUtils.JSON_MAPPER.convertValue(inputRowParser, Map.class);
final DataSchema dataSchema = new DataSchema(Preconditions.checkNotNull(dataSource, "Data source name is null"), inputParser, aggregatorFactories.toArray(new AggregatorFactory[aggregatorFactories.size()]), granularitySpec, DruidStorageHandlerUtils.JSON_MAPPER);
final String workingPath = jc.get(Constants.DRUID_JOB_WORKING_DIRECTORY);
final String version = jc.get(Constants.DRUID_SEGMENT_VERSION);
String basePersistDirectory = HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_BASE_PERSIST_DIRECTORY);
if (Strings.isNullOrEmpty(basePersistDirectory)) {
basePersistDirectory = System.getProperty("java.io.tmpdir");
}
Integer maxRowInMemory = HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_ROW_IN_MEMORY);
IndexSpec indexSpec;
if ("concise".equals(HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_BITMAP_FACTORY_TYPE))) {
indexSpec = new IndexSpec(new ConciseBitmapSerdeFactory(), null, null, null);
} else {
indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
}
RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(maxRowInMemory, null, null, new File(basePersistDirectory, dataSource), new CustomVersioningPolicy(version), null, null, null, indexSpec, true, 0, 0, true, null, 0L);
LOG.debug(String.format("running with Data schema [%s] ", dataSchema));
return new DruidRecordWriter(dataSchema, realtimeTuningConfig, DruidStorageHandlerUtils.createSegmentPusherForDirectory(segmentDirectory, jc), maxPartitionSize, new Path(workingPath, SEGMENTS_DESCRIPTOR_DIR_NAME), finalOutPath.getFileSystem(jc));
}
use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.
the class HiveHBaseInputFormatUtil method getConstantVal.
static byte[] getConstantVal(Object writable, PrimitiveObjectInspector poi, boolean isKeyBinary) throws IOException {
if (!isKeyBinary) {
// Key is stored in text format. Get bytes representation of constant also of
// text format.
byte[] startRow;
ByteStream.Output serializeStream = new ByteStream.Output();
LazyUtils.writePrimitiveUTF8(serializeStream, writable, poi, false, (byte) 0, null);
startRow = new byte[serializeStream.getLength()];
System.arraycopy(serializeStream.getData(), 0, startRow, 0, serializeStream.getLength());
return startRow;
}
PrimitiveCategory pc = poi.getPrimitiveCategory();
switch(poi.getPrimitiveCategory()) {
case INT:
return Bytes.toBytes(((IntWritable) writable).get());
case BOOLEAN:
return Bytes.toBytes(((BooleanWritable) writable).get());
case LONG:
return Bytes.toBytes(((LongWritable) writable).get());
case FLOAT:
return Bytes.toBytes(((FloatWritable) writable).get());
case DOUBLE:
return Bytes.toBytes(((DoubleWritable) writable).get());
case SHORT:
return Bytes.toBytes(((ShortWritable) writable).get());
case STRING:
return Bytes.toBytes(((Text) writable).toString());
case BYTE:
return Bytes.toBytes(((ByteWritable) writable).get());
default:
throw new IOException("Type not supported " + pc);
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.
the class VectorAssignRow method initTargetEntry.
/*
* Initialize one column's target related arrays.
*/
private void initTargetEntry(int logicalColumnIndex, int projectionColumnNum, TypeInfo typeInfo) {
isConvert[logicalColumnIndex] = false;
projectionColumnNums[logicalColumnIndex] = projectionColumnNum;
targetTypeInfos[logicalColumnIndex] = typeInfo;
if (typeInfo.getCategory() == Category.PRIMITIVE) {
final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
final PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
switch(primitiveCategory) {
case CHAR:
maxLengths[logicalColumnIndex] = ((CharTypeInfo) primitiveTypeInfo).getLength();
break;
case VARCHAR:
maxLengths[logicalColumnIndex] = ((VarcharTypeInfo) primitiveTypeInfo).getLength();
break;
default:
// No additional data type specific setting.
break;
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.
the class VectorizationContext method getVectorTypeScalarValue.
private Object getVectorTypeScalarValue(ExprNodeConstantDesc constDesc) throws HiveException {
TypeInfo typeInfo = constDesc.getTypeInfo();
PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
Object scalarValue = getScalarValue(constDesc);
switch(primitiveCategory) {
case DATE:
return new Long(DateWritable.dateToDays((Date) scalarValue));
case INTERVAL_YEAR_MONTH:
return ((HiveIntervalYearMonth) scalarValue).getTotalMonths();
default:
return scalarValue;
}
}
Aggregations