use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class FileSinkOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
try {
this.hconf = hconf;
filesCreated = false;
isNativeTable = !conf.getTableInfo().isNonNative();
isTemporary = conf.isTemporary();
multiFileSpray = conf.isMultiFileSpray();
totalFiles = conf.getTotalFiles();
numFiles = conf.getNumFiles();
dpCtx = conf.getDynPartCtx();
lbCtx = conf.getLbCtx();
fsp = prevFsp = null;
valToPaths = new HashMap<String, FSPaths>();
taskId = Utilities.getTaskId(hconf);
initializeSpecPath();
fs = specPath.getFileSystem(hconf);
try {
createHiveOutputFormat(hconf);
} catch (HiveException ex) {
logOutputFormatError(hconf, ex);
throw ex;
}
isCompressed = conf.getCompressed();
parent = Utilities.toTempPath(conf.getDirName());
statsFromRecordWriter = new boolean[numFiles];
serializer = (Serializer) conf.getTableInfo().getDeserializerClass().newInstance();
serializer.initialize(unsetNestedColumnPaths(hconf), conf.getTableInfo().getProperties());
outputClass = serializer.getSerializedClass();
if (isLogInfoEnabled) {
LOG.info("Using serializer : " + serializer + " and formatter : " + hiveOutputFormat + (isCompressed ? " with compression" : ""));
}
// Timeout is chosen to make sure that even if one iteration takes more than
// half of the script.timeout but less than script.timeout, we will still
// be able to report progress.
timeOut = hconf.getInt("mapred.healthChecker.script.timeout", 600000) / 2;
if (hconf instanceof JobConf) {
jc = (JobConf) hconf;
} else {
// test code path
jc = new JobConf(hconf);
}
if (multiFileSpray) {
partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
int i = 0;
for (ExprNodeDesc e : conf.getPartitionCols()) {
partitionEval[i++] = ExprNodeEvaluatorFactory.get(e);
}
partitionObjectInspectors = initEvaluators(partitionEval, outputObjInspector);
prtner = (HivePartitioner<HiveKey, Object>) ReflectionUtils.newInstance(jc.getPartitionerClass(), null);
}
if (dpCtx != null) {
dpSetup();
}
if (lbCtx != null) {
lbSetup();
}
if (!bDynParts) {
fsp = new FSPaths(specPath);
// createBucketFiles(fsp);
if (!this.isSkewedStoredAsSubDirectories) {
// special entry for non-DP case
valToPaths.put("", fsp);
}
}
final StoragePolicyValue tmpStorage = StoragePolicyValue.lookup(HiveConf.getVar(hconf, HIVE_TEMPORARY_TABLE_STORAGE));
if (isTemporary && fsp != null && tmpStorage != StoragePolicyValue.DEFAULT) {
final Path outputPath = fsp.taskOutputTempPath;
StoragePolicyShim shim = ShimLoader.getHadoopShims().getStoragePolicyShim(fs);
if (shim != null) {
// directory creation is otherwise within the writers
fs.mkdirs(outputPath);
shim.setStoragePolicy(outputPath, tmpStorage);
}
}
if (conf.getWriteType() == AcidUtils.Operation.UPDATE || conf.getWriteType() == AcidUtils.Operation.DELETE) {
// ROW__ID is always in the first field
recIdField = ((StructObjectInspector) outputObjInspector).getAllStructFieldRefs().get(0);
recIdInspector = (StructObjectInspector) recIdField.getFieldObjectInspector();
// bucket is the second field in the record id
bucketField = recIdInspector.getAllStructFieldRefs().get(1);
bucketInspector = (IntObjectInspector) bucketField.getFieldObjectInspector();
}
numRows = 0;
cntr = 1;
logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);
statsMap.put(getCounterName(Counter.RECORDS_OUT), row_count);
} catch (HiveException e) {
throw e;
} catch (Exception e) {
e.printStackTrace();
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class GroupByOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
numRowsInput = 0;
numRowsHashTbl = 0;
heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT);
countAfterReport = 0;
groupingSetsPresent = conf.isGroupingSetsPresent();
ObjectInspector rowInspector = inputObjInspectors[0];
// init keyFields
int numKeys = conf.getKeys().size();
keyFields = new ExprNodeEvaluator[numKeys];
keyObjectInspectors = new ObjectInspector[numKeys];
currentKeyObjectInspectors = new ObjectInspector[numKeys];
for (int i = 0; i < numKeys; i++) {
keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i), hconf);
keyObjectInspectors[i] = keyFields[i].initialize(rowInspector);
currentKeyObjectInspectors[i] = ObjectInspectorUtils.getStandardObjectInspector(keyObjectInspectors[i], ObjectInspectorCopyOption.WRITABLE);
}
// each row
if (groupingSetsPresent) {
groupingSets = conf.getListGroupingSets();
groupingSetsPosition = conf.getGroupingSetPosition();
newKeysGroupingSets = new IntWritable[groupingSets.size()];
groupingSetsBitSet = new FastBitSet[groupingSets.size()];
int pos = 0;
for (Integer groupingSet : groupingSets) {
// Create the mapping corresponding to the grouping set
newKeysGroupingSets[pos] = new IntWritable(groupingSet);
groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet, groupingSetsPosition);
pos++;
}
}
// initialize unionExpr for reduce-side
// reduce KEY has union field as the last field if there are distinct
// aggregates in group-by.
List<? extends StructField> sfs = ((StructObjectInspector) rowInspector).getAllStructFieldRefs();
if (sfs.size() > 0) {
StructField keyField = sfs.get(0);
if (keyField.getFieldName().toUpperCase().equals(Utilities.ReduceField.KEY.name())) {
ObjectInspector keyObjInspector = keyField.getFieldObjectInspector();
if (keyObjInspector instanceof StructObjectInspector) {
List<? extends StructField> keysfs = ((StructObjectInspector) keyObjInspector).getAllStructFieldRefs();
if (keysfs.size() > 0) {
// the last field is the union field, if any
StructField sf = keysfs.get(keysfs.size() - 1);
if (sf.getFieldObjectInspector().getCategory().equals(ObjectInspector.Category.UNION)) {
unionExprEval = ExprNodeEvaluatorFactory.get(new ExprNodeColumnDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), keyField.getFieldName() + "." + sf.getFieldName(), null, false), hconf);
unionExprEval.initialize(rowInspector);
}
}
}
}
}
// init aggregationParameterFields
ArrayList<AggregationDesc> aggrs = conf.getAggregators();
aggregationParameterFields = new ExprNodeEvaluator[aggrs.size()][];
aggregationParameterObjectInspectors = new ObjectInspector[aggrs.size()][];
aggregationParameterStandardObjectInspectors = new ObjectInspector[aggrs.size()][];
aggregationParameterObjects = new Object[aggrs.size()][];
aggregationIsDistinct = new boolean[aggrs.size()];
for (int i = 0; i < aggrs.size(); i++) {
AggregationDesc aggr = aggrs.get(i);
ArrayList<ExprNodeDesc> parameters = aggr.getParameters();
aggregationParameterFields[i] = new ExprNodeEvaluator[parameters.size()];
aggregationParameterObjectInspectors[i] = new ObjectInspector[parameters.size()];
aggregationParameterStandardObjectInspectors[i] = new ObjectInspector[parameters.size()];
aggregationParameterObjects[i] = new Object[parameters.size()];
for (int j = 0; j < parameters.size(); j++) {
aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory.get(parameters.get(j), hconf);
aggregationParameterObjectInspectors[i][j] = aggregationParameterFields[i][j].initialize(rowInspector);
if (unionExprEval != null) {
String[] names = parameters.get(j).getExprString().split("\\.");
// parameters of the form : KEY.colx:t.coly
if (Utilities.ReduceField.KEY.name().equals(names[0]) && names.length > 2) {
String name = names[names.length - 2];
int tag = Integer.parseInt(name.split("\\:")[1]);
if (aggr.getDistinct()) {
// is distinct
Set<Integer> set = distinctKeyAggrs.get(tag);
if (null == set) {
set = new HashSet<Integer>();
distinctKeyAggrs.put(tag, set);
}
if (!set.contains(i)) {
set.add(i);
}
} else {
Set<Integer> set = nonDistinctKeyAggrs.get(tag);
if (null == set) {
set = new HashSet<Integer>();
nonDistinctKeyAggrs.put(tag, set);
}
if (!set.contains(i)) {
set.add(i);
}
}
} else {
// will be KEY._COLx or VALUE._COLx
if (!nonDistinctAggrs.contains(i)) {
nonDistinctAggrs.add(i);
}
}
} else {
if (aggr.getDistinct()) {
aggregationIsDistinct[i] = true;
}
}
aggregationParameterStandardObjectInspectors[i][j] = ObjectInspectorUtils.getStandardObjectInspector(aggregationParameterObjectInspectors[i][j], ObjectInspectorCopyOption.WRITABLE);
aggregationParameterObjects[i][j] = null;
}
if (parameters.size() == 0) {
// for ex: count(*)
if (!nonDistinctAggrs.contains(i)) {
nonDistinctAggrs.add(i);
}
}
}
// init aggregationClasses
aggregationEvaluators = new GenericUDAFEvaluator[conf.getAggregators().size()];
for (int i = 0; i < aggregationEvaluators.length; i++) {
AggregationDesc agg = conf.getAggregators().get(i);
aggregationEvaluators[i] = agg.getGenericUDAFEvaluator();
}
MapredContext context = MapredContext.get();
if (context != null) {
for (GenericUDAFEvaluator genericUDAFEvaluator : aggregationEvaluators) {
context.setup(genericUDAFEvaluator);
}
}
// grouping id should be pruned, which is the last of key columns
// see ColumnPrunerGroupByProc
outputKeyLength = conf.pruneGroupingSetId() ? keyFields.length - 1 : keyFields.length;
// init objectInspectors
ObjectInspector[] objectInspectors = new ObjectInspector[outputKeyLength + aggregationEvaluators.length];
for (int i = 0; i < outputKeyLength; i++) {
objectInspectors[i] = currentKeyObjectInspectors[i];
}
for (int i = 0; i < aggregationEvaluators.length; i++) {
objectInspectors[outputKeyLength + i] = aggregationEvaluators[i].init(conf.getAggregators().get(i).getMode(), aggregationParameterObjectInspectors[i]);
}
aggregationsParametersLastInvoke = new Object[conf.getAggregators().size()][];
if ((conf.getMode() != GroupByDesc.Mode.HASH || conf.getBucketGroup()) && (!groupingSetsPresent)) {
aggregations = newAggregations();
hashAggr = false;
} else {
hashAggregations = new HashMap<KeyWrapper, AggregationBuffer[]>(256);
aggregations = newAggregations();
hashAggr = true;
keyPositionsSize = new ArrayList<Integer>();
aggrPositions = new List[aggregations.length];
groupbyMapAggrInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL);
// compare every groupbyMapAggrInterval rows
numRowsCompareHashAggr = groupbyMapAggrInterval;
minReductionHashAggr = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
}
List<String> fieldNames = new ArrayList<String>(conf.getOutputColumnNames());
outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, Arrays.asList(objectInspectors));
KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, currentKeyObjectInspectors);
newKeys = keyWrapperFactory.getKeyWrapper();
isTez = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
isLlap = isTez && HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_MODE).equals("llap");
numExecutors = isLlap ? HiveConf.getIntVar(hconf, HiveConf.ConfVars.LLAP_DAEMON_NUM_EXECUTORS) : 1;
firstRow = true;
// is not known, estimate that based on the number of entries
if (hashAggr) {
computeMaxEntriesHashAggr();
}
memoryMXBean = ManagementFactory.getMemoryMXBean();
maxMemory = isTez ? getConf().getMaxMemoryAvailable() : memoryMXBean.getHeapMemoryUsage().getMax();
memoryThreshold = this.getConf().getMemoryThreshold();
LOG.info("isTez: {} isLlap: {} numExecutors: {} maxMemory: {}", isTez, isLlap, numExecutors, maxMemory);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class GroupByOperator method genColLists.
// Group by contains the columns needed - no need to aggregate from children
public List<String> genColLists(HashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx) {
List<String> colLists = new ArrayList<String>();
ArrayList<ExprNodeDesc> keys = conf.getKeys();
for (ExprNodeDesc key : keys) {
colLists = Utilities.mergeUniqElems(colLists, key.getCols());
}
ArrayList<AggregationDesc> aggrs = conf.getAggregators();
for (AggregationDesc aggr : aggrs) {
ArrayList<ExprNodeDesc> params = aggr.getParameters();
for (ExprNodeDesc param : params) {
colLists = Utilities.mergeUniqElems(colLists, param.getCols());
}
}
return colLists;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class SparkDynamicPartitionPruner method initialize.
public void initialize(MapWork work, JobConf jobConf) throws SerDeException {
Map<String, SourceInfo> columnMap = new HashMap<String, SourceInfo>();
Set<String> sourceWorkIds = work.getEventSourceTableDescMap().keySet();
for (String id : sourceWorkIds) {
List<TableDesc> tables = work.getEventSourceTableDescMap().get(id);
List<String> columnNames = work.getEventSourceColumnNameMap().get(id);
List<ExprNodeDesc> partKeyExprs = work.getEventSourcePartKeyExprMap().get(id);
Iterator<String> cit = columnNames.iterator();
Iterator<ExprNodeDesc> pit = partKeyExprs.iterator();
for (TableDesc t : tables) {
String columnName = cit.next();
ExprNodeDesc partKeyExpr = pit.next();
SourceInfo si = new SourceInfo(t, partKeyExpr, columnName, jobConf);
if (!sourceInfoMap.containsKey(id)) {
sourceInfoMap.put(id, new ArrayList<SourceInfo>());
}
sourceInfoMap.get(id).add(si);
// the union of the values in that case.
if (columnMap.containsKey(columnName)) {
si.values = columnMap.get(columnName).values;
}
columnMap.put(columnName, si);
}
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class VectorizationContext method getCastToLongExpression.
private VectorExpression getCastToLongExpression(List<ExprNodeDesc> childExpr, PrimitiveCategory integerPrimitiveCategory) throws HiveException {
ExprNodeDesc child = childExpr.get(0);
String inputType = childExpr.get(0).getTypeString();
if (child instanceof ExprNodeConstantDesc) {
// Return a constant vector expression
Object constantValue = ((ExprNodeConstantDesc) child).getValue();
Long longValue = castConstantToLong(constantValue, child.getTypeInfo(), integerPrimitiveCategory);
return getConstantVectorExpression(longValue, TypeInfoFactory.longTypeInfo, VectorExpressionDescriptor.Mode.PROJECTION);
}
// special handling.
if (isIntFamily(inputType)) {
// integer and boolean types require no conversion, so use a no-op
return getIdentityExpression(childExpr);
}
return null;
}
Aggregations