use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class FetchOperator method createPartValue.
private Object[] createPartValue(PartitionDesc partDesc, StructObjectInspector partOI) {
Map<String, String> partSpec = partDesc.getPartSpec();
List<? extends StructField> fields = partOI.getAllStructFieldRefs();
Object[] partValues = new Object[fields.size()];
for (int i = 0; i < partValues.length; i++) {
StructField field = fields.get(i);
String value = partSpec.get(field.getFieldName());
ObjectInspector oi = field.getFieldObjectInspector();
partValues[i] = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi).convert(value);
}
return partValues;
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class GroupByOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
numRowsInput = 0;
numRowsHashTbl = 0;
heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT);
countAfterReport = 0;
groupingSetsPresent = conf.isGroupingSetsPresent();
ObjectInspector rowInspector = inputObjInspectors[0];
// init keyFields
int numKeys = conf.getKeys().size();
keyFields = new ExprNodeEvaluator[numKeys];
keyObjectInspectors = new ObjectInspector[numKeys];
currentKeyObjectInspectors = new ObjectInspector[numKeys];
for (int i = 0; i < numKeys; i++) {
keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i), hconf);
keyObjectInspectors[i] = keyFields[i].initialize(rowInspector);
currentKeyObjectInspectors[i] = ObjectInspectorUtils.getStandardObjectInspector(keyObjectInspectors[i], ObjectInspectorCopyOption.WRITABLE);
}
// each row
if (groupingSetsPresent) {
groupingSets = conf.getListGroupingSets();
groupingSetsPosition = conf.getGroupingSetPosition();
newKeysGroupingSets = new IntWritable[groupingSets.size()];
groupingSetsBitSet = new FastBitSet[groupingSets.size()];
int pos = 0;
for (Integer groupingSet : groupingSets) {
// Create the mapping corresponding to the grouping set
newKeysGroupingSets[pos] = new IntWritable(groupingSet);
groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet, groupingSetsPosition);
pos++;
}
}
// initialize unionExpr for reduce-side
// reduce KEY has union field as the last field if there are distinct
// aggregates in group-by.
List<? extends StructField> sfs = ((StructObjectInspector) rowInspector).getAllStructFieldRefs();
if (sfs.size() > 0) {
StructField keyField = sfs.get(0);
if (keyField.getFieldName().toUpperCase().equals(Utilities.ReduceField.KEY.name())) {
ObjectInspector keyObjInspector = keyField.getFieldObjectInspector();
if (keyObjInspector instanceof StructObjectInspector) {
List<? extends StructField> keysfs = ((StructObjectInspector) keyObjInspector).getAllStructFieldRefs();
if (keysfs.size() > 0) {
// the last field is the union field, if any
StructField sf = keysfs.get(keysfs.size() - 1);
if (sf.getFieldObjectInspector().getCategory().equals(ObjectInspector.Category.UNION)) {
unionExprEval = ExprNodeEvaluatorFactory.get(new ExprNodeColumnDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), keyField.getFieldName() + "." + sf.getFieldName(), null, false), hconf);
unionExprEval.initialize(rowInspector);
}
}
}
}
}
// init aggregationParameterFields
ArrayList<AggregationDesc> aggrs = conf.getAggregators();
aggregationParameterFields = new ExprNodeEvaluator[aggrs.size()][];
aggregationParameterObjectInspectors = new ObjectInspector[aggrs.size()][];
aggregationParameterStandardObjectInspectors = new ObjectInspector[aggrs.size()][];
aggregationParameterObjects = new Object[aggrs.size()][];
aggregationIsDistinct = new boolean[aggrs.size()];
for (int i = 0; i < aggrs.size(); i++) {
AggregationDesc aggr = aggrs.get(i);
ArrayList<ExprNodeDesc> parameters = aggr.getParameters();
aggregationParameterFields[i] = new ExprNodeEvaluator[parameters.size()];
aggregationParameterObjectInspectors[i] = new ObjectInspector[parameters.size()];
aggregationParameterStandardObjectInspectors[i] = new ObjectInspector[parameters.size()];
aggregationParameterObjects[i] = new Object[parameters.size()];
for (int j = 0; j < parameters.size(); j++) {
aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory.get(parameters.get(j), hconf);
aggregationParameterObjectInspectors[i][j] = aggregationParameterFields[i][j].initialize(rowInspector);
if (unionExprEval != null) {
String[] names = parameters.get(j).getExprString().split("\\.");
// parameters of the form : KEY.colx:t.coly
if (Utilities.ReduceField.KEY.name().equals(names[0]) && names.length > 2) {
String name = names[names.length - 2];
int tag = Integer.parseInt(name.split("\\:")[1]);
if (aggr.getDistinct()) {
// is distinct
Set<Integer> set = distinctKeyAggrs.get(tag);
if (null == set) {
set = new HashSet<Integer>();
distinctKeyAggrs.put(tag, set);
}
if (!set.contains(i)) {
set.add(i);
}
} else {
Set<Integer> set = nonDistinctKeyAggrs.get(tag);
if (null == set) {
set = new HashSet<Integer>();
nonDistinctKeyAggrs.put(tag, set);
}
if (!set.contains(i)) {
set.add(i);
}
}
} else {
// will be KEY._COLx or VALUE._COLx
if (!nonDistinctAggrs.contains(i)) {
nonDistinctAggrs.add(i);
}
}
} else {
if (aggr.getDistinct()) {
aggregationIsDistinct[i] = true;
}
}
aggregationParameterStandardObjectInspectors[i][j] = ObjectInspectorUtils.getStandardObjectInspector(aggregationParameterObjectInspectors[i][j], ObjectInspectorCopyOption.WRITABLE);
aggregationParameterObjects[i][j] = null;
}
if (parameters.size() == 0) {
// for ex: count(*)
if (!nonDistinctAggrs.contains(i)) {
nonDistinctAggrs.add(i);
}
}
}
// init aggregationClasses
aggregationEvaluators = new GenericUDAFEvaluator[conf.getAggregators().size()];
for (int i = 0; i < aggregationEvaluators.length; i++) {
AggregationDesc agg = conf.getAggregators().get(i);
aggregationEvaluators[i] = agg.getGenericUDAFEvaluator();
}
MapredContext context = MapredContext.get();
if (context != null) {
for (GenericUDAFEvaluator genericUDAFEvaluator : aggregationEvaluators) {
context.setup(genericUDAFEvaluator);
}
}
// grouping id should be pruned, which is the last of key columns
// see ColumnPrunerGroupByProc
outputKeyLength = conf.pruneGroupingSetId() ? keyFields.length - 1 : keyFields.length;
// init objectInspectors
ObjectInspector[] objectInspectors = new ObjectInspector[outputKeyLength + aggregationEvaluators.length];
for (int i = 0; i < outputKeyLength; i++) {
objectInspectors[i] = currentKeyObjectInspectors[i];
}
for (int i = 0; i < aggregationEvaluators.length; i++) {
objectInspectors[outputKeyLength + i] = aggregationEvaluators[i].init(conf.getAggregators().get(i).getMode(), aggregationParameterObjectInspectors[i]);
}
aggregationsParametersLastInvoke = new Object[conf.getAggregators().size()][];
if ((conf.getMode() != GroupByDesc.Mode.HASH || conf.getBucketGroup()) && (!groupingSetsPresent)) {
aggregations = newAggregations();
hashAggr = false;
} else {
hashAggregations = new HashMap<KeyWrapper, AggregationBuffer[]>(256);
aggregations = newAggregations();
hashAggr = true;
keyPositionsSize = new ArrayList<Integer>();
aggrPositions = new List[aggregations.length];
groupbyMapAggrInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL);
// compare every groupbyMapAggrInterval rows
numRowsCompareHashAggr = groupbyMapAggrInterval;
minReductionHashAggr = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
}
List<String> fieldNames = new ArrayList<String>(conf.getOutputColumnNames());
outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, Arrays.asList(objectInspectors));
KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, currentKeyObjectInspectors);
newKeys = keyWrapperFactory.getKeyWrapper();
isTez = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
isLlap = isTez && HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_MODE).equals("llap");
numExecutors = isLlap ? HiveConf.getIntVar(hconf, HiveConf.ConfVars.LLAP_DAEMON_NUM_EXECUTORS) : 1;
firstRow = true;
// is not known, estimate that based on the number of entries
if (hashAggr) {
computeMaxEntriesHashAggr();
}
memoryMXBean = ManagementFactory.getMemoryMXBean();
maxMemory = isTez ? getConf().getMaxMemoryAvailable() : memoryMXBean.getHeapMemoryUsage().getMax();
memoryThreshold = this.getConf().getMemoryThreshold();
LOG.info("isTez: {} isLlap: {} numExecutors: {} maxMemory: {}", isTez, isLlap, numExecutors, maxMemory);
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class JoinOperator method process.
@Override
public void process(Object row, int tag) throws HiveException {
try {
reportProgress();
lastAlias = alias;
alias = (byte) tag;
if (!alias.equals(lastAlias)) {
nextSz = joinEmitInterval;
}
List<Object> nr = getFilteredValue(alias, row);
if (handleSkewJoin) {
skewJoinKeyContext.handleSkew(tag);
}
// number of rows for the key in the given table
long sz = storage[alias].rowCount();
StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag];
StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
List keyObject = (List) soi.getStructFieldData(row, sf);
// Are we consuming too much memory
if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0) && !hasLeftSemiJoin) {
if (sz == joinEmitInterval && !hasFilter(condn[alias - 1].getLeft()) && !hasFilter(condn[alias - 1].getRight())) {
// The input is sorted by alias, so if we are already in the last join
// operand,
// we can emit some results now.
// Note this has to be done before adding the current row to the
// storage,
// to preserve the correctness for outer joins.
checkAndGenObject();
storage[alias].clearRows();
}
} else {
if (isLogInfoEnabled && (sz == nextSz)) {
// Print a message if we reached at least 1000 rows for a join operand
// We won't print a message for the last join operand since the size
// will never goes to joinEmitInterval.
LOG.info("table " + alias + " has " + sz + " rows for join key " + keyObject);
nextSz = getNextSize(nextSz);
}
}
// Add the value to the vector
// if join-key is null, process each row in different group.
StructObjectInspector inspector = (StructObjectInspector) sf.getFieldObjectInspector();
if (SerDeUtils.hasAnyNullObject(keyObject, inspector, nullsafes)) {
endGroup();
startGroup();
}
storage[alias].addRow(nr);
} catch (Exception e) {
e.printStackTrace();
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class JoinUtil method unflattenObjInspector.
/**
* Checks the input object inspector to see if it is in for form of a flattened struct
* like the ones generated by a vectorized reduce sink input:
* { 'key.reducesinkkey0':int, 'value._col0':int, 'value._col1':int, .. }
* If so, then it creates an "unflattened" struct that contains nested key/value
* structs:
* { key: { reducesinkkey0:int }, value: { _col0:int, _col1:int, .. } }
*
* @param oi
* @return unflattened object inspector if unflattening is needed,
* otherwise the original object inspector
*/
private static ObjectInspector unflattenObjInspector(ObjectInspector oi) {
if (oi instanceof StructObjectInspector) {
// Check if all fields start with "key." or "value."
// If so, then unflatten by adding an additional level of nested key and value structs
// Example: { "key.reducesinkkey0":int, "key.reducesinkkey1": int, "value._col6":int }
// Becomes
// { "key": { "reducesinkkey0":int, "reducesinkkey1":int }, "value": { "_col6":int } }
ArrayList<StructField> keyFields = new ArrayList<StructField>();
ArrayList<StructField> valueFields = new ArrayList<StructField>();
for (StructField field : ((StructObjectInspector) oi).getAllStructFieldRefs()) {
String fieldNameLower = field.getFieldName().toLowerCase();
if (fieldNameLower.startsWith(KEY_FIELD_PREFIX)) {
keyFields.add(field);
} else if (fieldNameLower.startsWith(VALUE_FIELD_PREFIX)) {
valueFields.add(field);
} else {
// Not a flattened struct, no need to unflatten
return oi;
}
}
// All field names are of the form "key." or "value."
// Create key/value structs and add the respective fields to each one
ArrayList<ObjectInspector> reduceFieldOIs = new ArrayList<ObjectInspector>();
reduceFieldOIs.add(createStructFromFields(keyFields, Utilities.ReduceField.KEY.toString()));
reduceFieldOIs.add(createStructFromFields(valueFields, Utilities.ReduceField.VALUE.toString()));
// Finally create the outer struct to contain the key, value structs
return ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, reduceFieldOIs);
}
return oi;
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class LateralViewJoinOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
ArrayList<String> fieldNames = conf.getOutputInternalColNames();
// The output of the lateral view join will be the columns from the select
// parent, followed by the column from the UDTF parent
StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[SELECT_TAG];
List<? extends StructField> sfs = soi.getAllStructFieldRefs();
for (StructField sf : sfs) {
ois.add(sf.getFieldObjectInspector());
}
soi = (StructObjectInspector) inputObjInspectors[UDTF_TAG];
sfs = soi.getAllStructFieldRefs();
for (StructField sf : sfs) {
ois.add(sf.getFieldObjectInspector());
}
outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, ois);
}
Aggregations