use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class DynamicPartitionPruningOptimization method generateSemiJoinOperatorPlan.
// Generates plan for min/max when dynamic partition pruning is ruled out.
private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContext parseContext, TableScanOperator ts, String keyBaseAlias, String internalColName, String colName, SemiJoinHint sjHint) throws SemanticException {
// we will put a fork in the plan at the source of the reduce sink
Operator<? extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
// we need the expr that generated the key of the reduce sink
ExprNodeDesc key = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex());
assert colName != null;
// Fetch the TableScan Operator.
Operator<?> op = parentOfRS;
while (!(op == null || op instanceof TableScanOperator || op instanceof ReduceSinkOperator)) {
op = op.getParentOperators().get(0);
}
Preconditions.checkNotNull(op);
if (op instanceof TableScanOperator) {
Table table = ((TableScanOperator) op).getConf().getTableMetadata();
if (table.isPartitionKey(colName)) {
// The column is partition column, skip the optimization.
return false;
}
}
// Check if there already exists a semijoin branch
GroupByOperator gb = parseContext.getColExprToGBMap().get(key);
if (gb != null) {
// Already an existing semijoin branch, reuse it
createFinalRsForSemiJoinOp(parseContext, ts, gb, key, keyBaseAlias, ctx.parent.getChildren().get(0), sjHint != null);
// done!
return true;
}
List<ExprNodeDesc> keyExprs = new ArrayList<ExprNodeDesc>();
keyExprs.add(key);
// group by requires "ArrayList", don't ask.
ArrayList<String> outputNames = new ArrayList<String>();
outputNames.add(HiveConf.getColumnInternalName(0));
// project the relevant key column
SelectDesc select = new SelectDesc(keyExprs, outputNames);
// Create the new RowSchema for the projected column
ColumnInfo columnInfo = parentOfRS.getSchema().getColumnInfo(internalColName);
ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>();
signature.add(columnInfo);
RowSchema rowSchema = new RowSchema(signature);
// Create the column expr map
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
ExprNodeDesc exprNode = null;
if (parentOfRS.getColumnExprMap() != null) {
exprNode = parentOfRS.getColumnExprMap().get(internalColName).clone();
} else {
exprNode = new ExprNodeColumnDesc(columnInfo);
}
if (exprNode instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc encd = (ExprNodeColumnDesc) exprNode;
encd.setColumn(internalColName);
}
colExprMap.put(internalColName, exprNode);
// Create the Select Operator
SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(select, rowSchema, colExprMap, parentOfRS);
// do a group by to aggregate min,max and bloom filter.
float groupByMemoryUsage = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
// Add min/max and bloom filter aggregations
List<ObjectInspector> aggFnOIs = new ArrayList<ObjectInspector>();
aggFnOIs.add(key.getWritableObjectInspector());
ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
params.add(new ExprNodeColumnDesc(key.getTypeInfo(), outputNames.get(0), "", false));
ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
try {
AggregationDesc min = new AggregationDesc("min", FunctionRegistry.getGenericUDAFEvaluator("min", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
AggregationDesc max = new AggregationDesc("max", FunctionRegistry.getGenericUDAFEvaluator("max", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
AggregationDesc bloomFilter = new AggregationDesc("bloom_filter", FunctionRegistry.getGenericUDAFEvaluator("bloom_filter", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator();
bloomFilterEval.setSourceOperator(selectOp);
if (sjHint != null && sjHint.getNumEntries() > 0) {
LOG.debug("Setting size for " + keyBaseAlias + " to " + sjHint.getNumEntries() + " based on the hint");
bloomFilterEval.setHintEntries(sjHint.getNumEntries());
}
bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES));
bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR));
bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval);
aggs.add(min);
aggs.add(max);
aggs.add(bloomFilter);
} catch (SemanticException e) {
LOG.error("Error creating min/max aggregations on key", e);
throw new IllegalStateException("Error creating min/max aggregations on key", e);
}
// Create the Group by Operator
ArrayList<String> gbOutputNames = new ArrayList<String>();
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(0));
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(1));
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(2));
GroupByDesc groupBy = new GroupByDesc(GroupByDesc.Mode.HASH, gbOutputNames, new ArrayList<ExprNodeDesc>(), aggs, false, groupByMemoryUsage, memoryThreshold, null, false, -1, false);
ArrayList<ColumnInfo> groupbyColInfos = new ArrayList<ColumnInfo>();
groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(0), key.getTypeInfo(), "", false));
groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(1), key.getTypeInfo(), "", false));
groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(2), key.getTypeInfo(), "", false));
GroupByOperator groupByOp = (GroupByOperator) OperatorFactory.getAndMakeChild(groupBy, new RowSchema(groupbyColInfos), selectOp);
groupByOp.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
// Get the column names of the aggregations for reduce sink
int colPos = 0;
ArrayList<ExprNodeDesc> rsValueCols = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < aggs.size() - 1; i++) {
ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(key.getTypeInfo(), gbOutputNames.get(colPos++), "", false);
rsValueCols.add(colExpr);
}
// Bloom Filter uses binary
ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(TypeInfoFactory.binaryTypeInfo, gbOutputNames.get(colPos++), "", false);
rsValueCols.add(colExpr);
// Create the reduce sink operator
ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(new ArrayList<ExprNodeDesc>(), rsValueCols, gbOutputNames, false, -1, 0, 1, Operation.NOT_ACID);
ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(groupByOp.getSchema()), groupByOp);
Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
rsOp.setColumnExprMap(columnExprMap);
rsOp.getConf().setReducerTraits(EnumSet.of(ReduceSinkDesc.ReducerTraits.QUICKSTART));
// Create the final Group By Operator
ArrayList<AggregationDesc> aggsFinal = new ArrayList<AggregationDesc>();
try {
List<ObjectInspector> minFinalFnOIs = new ArrayList<ObjectInspector>();
List<ObjectInspector> maxFinalFnOIs = new ArrayList<ObjectInspector>();
List<ObjectInspector> bloomFilterFinalFnOIs = new ArrayList<ObjectInspector>();
ArrayList<ExprNodeDesc> minFinalParams = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> maxFinalParams = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> bloomFilterFinalParams = new ArrayList<ExprNodeDesc>();
// Use the expressions from Reduce Sink.
minFinalFnOIs.add(rsValueCols.get(0).getWritableObjectInspector());
maxFinalFnOIs.add(rsValueCols.get(1).getWritableObjectInspector());
bloomFilterFinalFnOIs.add(rsValueCols.get(2).getWritableObjectInspector());
// Coming from a ReduceSink the aggregations would be in the form VALUE._col0, VALUE._col1
minFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(0).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(0), "", false));
maxFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(1).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(1), "", false));
bloomFilterFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(2).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(2), "", false));
AggregationDesc min = new AggregationDesc("min", FunctionRegistry.getGenericUDAFEvaluator("min", minFinalFnOIs, false, false), minFinalParams, false, Mode.FINAL);
AggregationDesc max = new AggregationDesc("max", FunctionRegistry.getGenericUDAFEvaluator("max", maxFinalFnOIs, false, false), maxFinalParams, false, Mode.FINAL);
AggregationDesc bloomFilter = new AggregationDesc("bloom_filter", FunctionRegistry.getGenericUDAFEvaluator("bloom_filter", bloomFilterFinalFnOIs, false, false), bloomFilterFinalParams, false, Mode.FINAL);
GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator();
bloomFilterEval.setSourceOperator(selectOp);
if (sjHint != null && sjHint.getNumEntries() > 0) {
bloomFilterEval.setHintEntries(sjHint.getNumEntries());
}
bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES));
bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR));
bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval);
aggsFinal.add(min);
aggsFinal.add(max);
aggsFinal.add(bloomFilter);
} catch (SemanticException e) {
LOG.error("Error creating min/max aggregations on key", e);
throw new IllegalStateException("Error creating min/max aggregations on key", e);
}
GroupByDesc groupByDescFinal = new GroupByDesc(GroupByDesc.Mode.FINAL, gbOutputNames, new ArrayList<ExprNodeDesc>(), aggsFinal, false, groupByMemoryUsage, memoryThreshold, null, false, 0, false);
GroupByOperator groupByOpFinal = (GroupByOperator) OperatorFactory.getAndMakeChild(groupByDescFinal, new RowSchema(rsOp.getSchema()), rsOp);
groupByOpFinal.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
createFinalRsForSemiJoinOp(parseContext, ts, groupByOpFinal, key, keyBaseAlias, ctx.parent.getChildren().get(0), sjHint != null);
return true;
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class TestVectorGroupByOperator method buildAggregationDesc.
private static AggregationDesc buildAggregationDesc(VectorizationContext ctx, String aggregate, GenericUDAFEvaluator.Mode mode, String column, TypeInfo typeInfo) {
ExprNodeDesc inputColumn = buildColumnDesc(ctx, column, typeInfo);
ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
params.add(inputColumn);
AggregationDesc agg = new AggregationDesc();
agg.setGenericUDAFName(aggregate);
agg.setMode(mode);
agg.setParameters(params);
TypeInfo[] typeInfos = new TypeInfo[] { typeInfo };
final GenericUDAFEvaluator evaluator;
PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
try {
switch(aggregate) {
case "count":
evaluator = new GenericUDAFCount.GenericUDAFCountEvaluator();
break;
case "min":
evaluator = new GenericUDAFMin.GenericUDAFMinEvaluator();
break;
case "max":
evaluator = new GenericUDAFMax.GenericUDAFMaxEvaluator();
break;
case "sum":
evaluator = (new GenericUDAFSum()).getEvaluator(typeInfos);
break;
case "avg":
evaluator = (new GenericUDAFAverage()).getEvaluator(typeInfos);
break;
case "variance":
case "var":
case "var_pop":
evaluator = new GenericUDAFVariance.GenericUDAFVarianceEvaluator();
break;
case "var_samp":
evaluator = new GenericUDAFVarianceSample.GenericUDAFVarianceSampleEvaluator();
break;
case "std":
case "stddev":
case "stddev_pop":
evaluator = new GenericUDAFStd.GenericUDAFStdEvaluator();
break;
case "stddev_samp":
evaluator = new GenericUDAFStdSample.GenericUDAFStdSampleEvaluator();
break;
default:
throw new RuntimeException("Unexpected aggregate " + aggregate);
}
} catch (SemanticException e) {
throw new RuntimeException(e);
}
agg.setGenericUDAFEvaluator(evaluator);
return agg;
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class ExportTask method execute.
@Override
protected int execute(DriverContext driverContext) {
try {
// Also creates the root directory
TableExport.Paths exportPaths = new TableExport.Paths(work.getAstRepresentationForErrorMsg(), work.getExportRootDir(), conf, false);
Hive db = getHive();
LOG.debug("Exporting data to: {}", exportPaths.getExportRootDir());
TableExport tableExport = new TableExport(exportPaths, work.getTableSpec(), work.getReplicationSpec(), db, null, conf);
if (!tableExport.write()) {
throw new SemanticException(ErrorMsg.EXIM_FOR_NON_NATIVE.getMsg());
}
} catch (Exception e) {
LOG.error("failed", e);
setException(e);
return 1;
}
return 0;
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class VectorizationContext method isCustomUDF.
private static boolean isCustomUDF(String udfName) {
if (udfName == null) {
return false;
}
FunctionInfo funcInfo;
try {
funcInfo = FunctionRegistry.getFunctionInfo(udfName);
} catch (SemanticException e) {
LOG.warn("Failed to load " + udfName, e);
funcInfo = null;
}
if (funcInfo == null) {
return false;
}
boolean isNativeFunc = funcInfo.isNative();
return !isNativeFunc;
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class LoadFunction method tasks.
public TaskTracker tasks() throws IOException, SemanticException {
URI fromURI = EximUtil.getValidatedURI(context.hiveConf, stripQuotes(event.rootDir().toUri().toString()));
Path fromPath = new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI.getPath());
try {
CreateFunctionHandler handler = new CreateFunctionHandler();
List<Task<? extends Serializable>> tasks = handler.handle(new MessageHandler.Context(dbNameToLoadIn, null, fromPath.toString(), null, null, context.hiveConf, context.hiveDb, null, LOG));
createFunctionReplLogTask(tasks, handler.getFunctionName());
tasks.forEach(tracker::addTask);
return tracker;
} catch (Exception e) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
}
}
Aggregations