use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class VectorExtractRow method init.
/*
* Initialize using an StructObjectInspector and a column projection list.
*/
public void init(StructObjectInspector structObjectInspector, List<Integer> projectedColumns) throws HiveException {
List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
final int count = fields.size();
allocateArrays(count);
for (int i = 0; i < count; i++) {
int projectionColumnNum = projectedColumns.get(i);
StructField field = fields.get(i);
ObjectInspector fieldInspector = field.getFieldObjectInspector();
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldInspector.getTypeName());
initEntry(i, projectionColumnNum, typeInfo);
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class VectorExtractRow method extractRowColumn.
public Object extractRowColumn(ColumnVector colVector, TypeInfo typeInfo, ObjectInspector objectInspector, int batchIndex) {
if (colVector == null) {
// may ask for them..
return null;
}
final int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex);
if (!colVector.noNulls && colVector.isNull[adjustedIndex]) {
return null;
}
final Category category = typeInfo.getCategory();
switch(category) {
case PRIMITIVE:
{
final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
final PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
final Writable primitiveWritable = VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory);
switch(primitiveCategory) {
case VOID:
return null;
case BOOLEAN:
((BooleanWritable) primitiveWritable).set(((LongColumnVector) colVector).vector[adjustedIndex] == 0 ? false : true);
return primitiveWritable;
case BYTE:
((ByteWritable) primitiveWritable).set((byte) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case SHORT:
((ShortWritable) primitiveWritable).set((short) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case INT:
((IntWritable) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case LONG:
((LongWritable) primitiveWritable).set(((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case TIMESTAMP:
((TimestampWritable) primitiveWritable).set(((TimestampColumnVector) colVector).asScratchTimestamp(adjustedIndex));
return primitiveWritable;
case DATE:
((DateWritable) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case FLOAT:
((FloatWritable) primitiveWritable).set((float) ((DoubleColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case DOUBLE:
((DoubleWritable) primitiveWritable).set(((DoubleColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case BINARY:
{
final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
if (bytesColVector.isRepeating) {
if (!bytesColVector.isNull[0] && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
} else {
if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
}
BytesWritable bytesWritable = (BytesWritable) primitiveWritable;
bytesWritable.set(bytes, start, length);
return primitiveWritable;
}
case STRING:
{
final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
if (bytesColVector.isRepeating) {
if (!bytesColVector.isNull[0] && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
} else {
if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
}
// Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
((Text) primitiveWritable).set(bytes, start, length);
return primitiveWritable;
}
case VARCHAR:
{
final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
if (bytesColVector.isRepeating) {
if (!bytesColVector.isNull[0] && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
} else {
if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
}
final int adjustedLength = StringExpr.truncate(bytes, start, length, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
final HiveVarcharWritable hiveVarcharWritable = (HiveVarcharWritable) primitiveWritable;
hiveVarcharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), -1);
return primitiveWritable;
}
case CHAR:
{
final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
if (bytesColVector.isRepeating) {
if (!bytesColVector.isNull[0] && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
} else {
if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex);
}
}
final int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, ((CharTypeInfo) primitiveTypeInfo).getLength());
final HiveCharWritable hiveCharWritable = (HiveCharWritable) primitiveWritable;
hiveCharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), ((CharTypeInfo) primitiveTypeInfo).getLength());
return primitiveWritable;
}
case DECIMAL:
if (colVector instanceof Decimal64ColumnVector) {
Decimal64ColumnVector dec32ColVector = (Decimal64ColumnVector) colVector;
((HiveDecimalWritable) primitiveWritable).deserialize64(dec32ColVector.vector[adjustedIndex], dec32ColVector.scale);
} else {
// The HiveDecimalWritable set method will quickly copy the deserialized decimal writable fields.
((HiveDecimalWritable) primitiveWritable).set(((DecimalColumnVector) colVector).vector[adjustedIndex]);
}
return primitiveWritable;
case INTERVAL_YEAR_MONTH:
((HiveIntervalYearMonthWritable) primitiveWritable).set((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
return primitiveWritable;
case INTERVAL_DAY_TIME:
((HiveIntervalDayTimeWritable) primitiveWritable).set(((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedIndex));
return primitiveWritable;
default:
throw new RuntimeException("Primitive category " + primitiveCategory.name() + " not supported");
}
}
case LIST:
{
final ListColumnVector listColumnVector = (ListColumnVector) colVector;
final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
final ListObjectInspector listObjectInspector = (ListObjectInspector) objectInspector;
final int offset = (int) listColumnVector.offsets[adjustedIndex];
final int size = (int) listColumnVector.lengths[adjustedIndex];
final List list = new ArrayList();
for (int i = 0; i < size; i++) {
list.add(extractRowColumn(listColumnVector.child, listTypeInfo.getListElementTypeInfo(), listObjectInspector.getListElementObjectInspector(), offset + i));
}
return list;
}
case MAP:
{
final MapColumnVector mapColumnVector = (MapColumnVector) colVector;
final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
final MapObjectInspector mapObjectInspector = (MapObjectInspector) objectInspector;
final int offset = (int) mapColumnVector.offsets[adjustedIndex];
final int size = (int) mapColumnVector.lengths[adjustedIndex];
final Map map = new HashMap();
for (int i = 0; i < size; i++) {
final Object key = extractRowColumn(mapColumnVector.keys, mapTypeInfo.getMapKeyTypeInfo(), mapObjectInspector.getMapKeyObjectInspector(), offset + i);
final Object value = extractRowColumn(mapColumnVector.values, mapTypeInfo.getMapValueTypeInfo(), mapObjectInspector.getMapValueObjectInspector(), offset + i);
map.put(key, value);
}
return map;
}
case STRUCT:
{
final StructColumnVector structColumnVector = (StructColumnVector) colVector;
final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
final StandardStructObjectInspector structInspector = (StandardStructObjectInspector) objectInspector;
final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
final int size = fieldTypeInfos.size();
final List<? extends StructField> structFields = structInspector.getAllStructFieldRefs();
final Object struct = structInspector.create();
for (int i = 0; i < size; i++) {
final StructField structField = structFields.get(i);
final TypeInfo fieldTypeInfo = fieldTypeInfos.get(i);
final Object value = extractRowColumn(structColumnVector.fields[i], fieldTypeInfo, structField.getFieldObjectInspector(), adjustedIndex);
structInspector.setStructFieldData(struct, structField, value);
}
return struct;
}
case UNION:
{
final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
final UnionObjectInspector unionInspector = (UnionObjectInspector) objectInspector;
final List<ObjectInspector> unionInspectors = unionInspector.getObjectInspectors();
final UnionColumnVector unionColumnVector = (UnionColumnVector) colVector;
final byte tag = (byte) unionColumnVector.tags[adjustedIndex];
final Object object = extractRowColumn(unionColumnVector.fields[tag], objectTypeInfos.get(tag), unionInspectors.get(tag), adjustedIndex);
final StandardUnion standardUnion = new StandardUnion();
standardUnion.setTag(tag);
standardUnion.setObject(object);
return standardUnion;
}
default:
throw new RuntimeException("Category " + category.name() + " not supported");
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class VectorAssignRow method init.
/*
* Initialize using an StructObjectInspector.
* No projection -- the column range 0 .. fields.size()-1
*/
public void init(StructObjectInspector structObjectInspector) throws HiveException {
final List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
final int count = fields.size();
allocateArrays(count);
for (int i = 0; i < count; i++) {
final StructField field = fields.get(i);
final ObjectInspector fieldInspector = field.getFieldObjectInspector();
final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldInspector.getTypeName());
initTargetEntry(i, i, typeInfo);
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class Utilities method constructVectorizedReduceRowOI.
/**
* Create row key and value object inspectors for reduce vectorization.
* The row object inspector used by ReduceWork needs to be a **standard**
* struct object inspector, not just any struct object inspector.
* @param keyInspector
* @param valueInspector
* @return OI
* @throws HiveException
*/
public static StandardStructObjectInspector constructVectorizedReduceRowOI(StructObjectInspector keyInspector, StructObjectInspector valueInspector) throws HiveException {
ArrayList<String> colNames = new ArrayList<String>();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
List<? extends StructField> fields = keyInspector.getAllStructFieldRefs();
for (StructField field : fields) {
colNames.add(Utilities.ReduceField.KEY.toString() + '.' + field.getFieldName());
ois.add(field.getFieldObjectInspector());
}
fields = valueInspector.getAllStructFieldRefs();
for (StructField field : fields) {
colNames.add(Utilities.ReduceField.VALUE.toString() + '.' + field.getFieldName());
ois.add(field.getFieldObjectInspector());
}
StandardStructObjectInspector rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, ois);
return rowObjectInspector;
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class GroupByOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
numRowsInput = 0;
numRowsHashTbl = 0;
heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT);
countAfterReport = 0;
ObjectInspector rowInspector = inputObjInspectors[0];
// init keyFields
int numKeys = conf.getKeys().size();
keyFields = new ExprNodeEvaluator[numKeys];
keyObjectInspectors = new ObjectInspector[numKeys];
currentKeyObjectInspectors = new ObjectInspector[numKeys];
for (int i = 0; i < numKeys; i++) {
keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i), hconf);
keyObjectInspectors[i] = keyFields[i].initialize(rowInspector);
currentKeyObjectInspectors[i] = ObjectInspectorUtils.getStandardObjectInspector(keyObjectInspectors[i], ObjectInspectorCopyOption.WRITABLE);
}
// Initialize the constants for the grouping sets, so that they can be re-used for
// each row
groupingSetsPresent = conf.isGroupingSetsPresent();
if (groupingSetsPresent) {
groupingSets = conf.getListGroupingSets();
groupingSetsPosition = conf.getGroupingSetPosition();
newKeysGroupingSets = new LongWritable[groupingSets.size()];
groupingSetsBitSet = new FastBitSet[groupingSets.size()];
int pos = 0;
for (Long groupingSet : groupingSets) {
// Create the mapping corresponding to the grouping set
newKeysGroupingSets[pos] = new LongWritable(groupingSet);
groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet, groupingSetsPosition);
pos++;
}
}
// initialize unionExpr for reduce-side
// reduce KEY has union field as the last field if there are distinct
// aggregates in group-by.
List<? extends StructField> sfs = ((StructObjectInspector) rowInspector).getAllStructFieldRefs();
if (sfs.size() > 0) {
StructField keyField = sfs.get(0);
if (keyField.getFieldName().toUpperCase().equals(Utilities.ReduceField.KEY.name())) {
ObjectInspector keyObjInspector = keyField.getFieldObjectInspector();
if (keyObjInspector instanceof StructObjectInspector) {
List<? extends StructField> keysfs = ((StructObjectInspector) keyObjInspector).getAllStructFieldRefs();
if (keysfs.size() > 0) {
// the last field is the union field, if any
StructField sf = keysfs.get(keysfs.size() - 1);
if (sf.getFieldObjectInspector().getCategory().equals(ObjectInspector.Category.UNION)) {
unionExprEval = ExprNodeEvaluatorFactory.get(new ExprNodeColumnDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), keyField.getFieldName() + "." + sf.getFieldName(), null, false), hconf);
unionExprEval.initialize(rowInspector);
}
}
}
}
}
// init aggregationParameterFields
ArrayList<AggregationDesc> aggrs = conf.getAggregators();
aggregationParameterFields = new ExprNodeEvaluator[aggrs.size()][];
aggregationParameterObjectInspectors = new ObjectInspector[aggrs.size()][];
aggregationParameterStandardObjectInspectors = new ObjectInspector[aggrs.size()][];
aggregationParameterObjects = new Object[aggrs.size()][];
aggregationIsDistinct = new boolean[aggrs.size()];
for (int i = 0; i < aggrs.size(); i++) {
AggregationDesc aggr = aggrs.get(i);
ArrayList<ExprNodeDesc> parameters = aggr.getParameters();
aggregationParameterFields[i] = new ExprNodeEvaluator[parameters.size()];
aggregationParameterObjectInspectors[i] = new ObjectInspector[parameters.size()];
aggregationParameterStandardObjectInspectors[i] = new ObjectInspector[parameters.size()];
aggregationParameterObjects[i] = new Object[parameters.size()];
for (int j = 0; j < parameters.size(); j++) {
aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory.get(parameters.get(j), hconf);
aggregationParameterObjectInspectors[i][j] = aggregationParameterFields[i][j].initialize(rowInspector);
if (unionExprEval != null) {
String[] names = parameters.get(j).getExprString().split("\\.");
// parameters of the form : KEY.colx:t.coly
if (Utilities.ReduceField.KEY.name().equals(names[0]) && names.length > 2) {
String name = names[names.length - 2];
int tag = Integer.parseInt(name.split("\\:")[1]);
if (aggr.getDistinct()) {
// is distinct
Set<Integer> set = distinctKeyAggrs.get(tag);
if (null == set) {
set = new HashSet<Integer>();
distinctKeyAggrs.put(tag, set);
}
if (!set.contains(i)) {
set.add(i);
}
} else {
Set<Integer> set = nonDistinctKeyAggrs.get(tag);
if (null == set) {
set = new HashSet<Integer>();
nonDistinctKeyAggrs.put(tag, set);
}
if (!set.contains(i)) {
set.add(i);
}
}
} else {
// will be KEY._COLx or VALUE._COLx
if (!nonDistinctAggrs.contains(i)) {
nonDistinctAggrs.add(i);
}
}
} else {
if (aggr.getDistinct()) {
aggregationIsDistinct[i] = true;
}
}
aggregationParameterStandardObjectInspectors[i][j] = ObjectInspectorUtils.getStandardObjectInspector(aggregationParameterObjectInspectors[i][j], ObjectInspectorCopyOption.WRITABLE);
aggregationParameterObjects[i][j] = null;
}
if (parameters.size() == 0) {
// for ex: count(*)
if (!nonDistinctAggrs.contains(i)) {
nonDistinctAggrs.add(i);
}
}
}
// init aggregationClasses
aggregationEvaluators = new GenericUDAFEvaluator[conf.getAggregators().size()];
for (int i = 0; i < aggregationEvaluators.length; i++) {
AggregationDesc agg = conf.getAggregators().get(i);
aggregationEvaluators[i] = agg.getGenericUDAFEvaluator();
}
MapredContext context = MapredContext.get();
if (context != null) {
for (GenericUDAFEvaluator genericUDAFEvaluator : aggregationEvaluators) {
context.setup(genericUDAFEvaluator);
}
}
// grouping id should be pruned, which is the last of key columns
// see ColumnPrunerGroupByProc
outputKeyLength = conf.pruneGroupingSetId() ? keyFields.length - 1 : keyFields.length;
// init objectInspectors
ObjectInspector[] objectInspectors = new ObjectInspector[outputKeyLength + aggregationEvaluators.length];
for (int i = 0; i < outputKeyLength; i++) {
objectInspectors[i] = currentKeyObjectInspectors[i];
}
for (int i = 0; i < aggregationEvaluators.length; i++) {
objectInspectors[outputKeyLength + i] = aggregationEvaluators[i].init(conf.getAggregators().get(i).getMode(), aggregationParameterObjectInspectors[i]);
}
aggregationsParametersLastInvoke = new Object[conf.getAggregators().size()][];
if ((conf.getMode() != GroupByDesc.Mode.HASH || conf.getBucketGroup()) && (!groupingSetsPresent)) {
aggregations = newAggregations();
hashAggr = false;
} else {
hashAggregations = new HashMap<KeyWrapper, AggregationBuffer[]>(256);
aggregations = newAggregations();
hashAggr = true;
keyPositionsSize = new ArrayList<Integer>();
aggrPositions = new List[aggregations.length];
groupbyMapAggrInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL);
// compare every groupbyMapAggrInterval rows
numRowsCompareHashAggr = groupbyMapAggrInterval;
minReductionHashAggr = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
}
List<String> fieldNames = new ArrayList<String>(conf.getOutputColumnNames());
outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, Arrays.asList(objectInspectors));
KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, currentKeyObjectInspectors);
newKeys = keyWrapperFactory.getKeyWrapper();
isTez = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
isLlap = LlapDaemonInfo.INSTANCE.isLlap();
numExecutors = isLlap ? LlapDaemonInfo.INSTANCE.getNumExecutors() : 1;
firstRow = true;
// is not known, estimate that based on the number of entries
if (hashAggr) {
computeMaxEntriesHashAggr();
}
memoryMXBean = ManagementFactory.getMemoryMXBean();
maxMemory = isTez ? getConf().getMaxMemoryAvailable() : memoryMXBean.getHeapMemoryUsage().getMax();
memoryThreshold = this.getConf().getMemoryThreshold();
LOG.info("isTez: {} isLlap: {} numExecutors: {} maxMemory: {}", isTez, isLlap, numExecutors, maxMemory);
}
Aggregations