use of org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator in project asterixdb by apache.
the class LangExpressionToPlanTranslator method translateLoad.
@Override
public ILogicalPlan translateLoad(ICompiledDmlStatement stmt) throws AlgebricksException {
CompiledLoadFromFileStatement clffs = (CompiledLoadFromFileStatement) stmt;
Dataset dataset = metadataProvider.findDataset(clffs.getDataverseName(), clffs.getDatasetName());
if (dataset == null) {
// This would never happen since we check for this in AqlTranslator
throw new AlgebricksException("Unable to load dataset " + clffs.getDatasetName() + " since it does not exist");
}
IAType itemType = metadataProvider.findType(dataset.getItemTypeDataverseName(), dataset.getItemTypeName());
IAType metaItemType = metadataProvider.findType(dataset.getMetaItemTypeDataverseName(), dataset.getMetaItemTypeName());
DatasetDataSource targetDatasource = validateDatasetInfo(metadataProvider, stmt.getDataverseName(), stmt.getDatasetName());
List<List<String>> partitionKeys = targetDatasource.getDataset().getPrimaryKeys();
if (dataset.hasMetaPart()) {
throw new AlgebricksException(dataset.getDatasetName() + ": load dataset is not supported on Datasets with Meta records");
}
LoadableDataSource lds;
try {
lds = new LoadableDataSource(dataset, itemType, metaItemType, clffs.getAdapter(), clffs.getProperties());
} catch (IOException e) {
throw new AlgebricksException(e);
}
// etsOp is a dummy input operator used to keep the compiler happy. it
// could be removed but would result in
// the need to fix many rewrite rules that assume that datasourcescan
// operators always have input.
ILogicalOperator etsOp = new EmptyTupleSourceOperator();
// Add a logical variable for the record.
List<LogicalVariable> payloadVars = new ArrayList<>();
payloadVars.add(context.newVar());
// Create a scan operator and make the empty tuple source its input
DataSourceScanOperator dssOp = new DataSourceScanOperator(payloadVars, lds);
dssOp.getInputs().add(new MutableObject<>(etsOp));
ILogicalExpression payloadExpr = new VariableReferenceExpression(payloadVars.get(0));
Mutable<ILogicalExpression> payloadRef = new MutableObject<>(payloadExpr);
// Creating the assign to extract the PK out of the record
ArrayList<LogicalVariable> pkVars = new ArrayList<>();
ArrayList<Mutable<ILogicalExpression>> pkExprs = new ArrayList<>();
List<Mutable<ILogicalExpression>> varRefsForLoading = new ArrayList<>();
LogicalVariable payloadVar = payloadVars.get(0);
for (List<String> keyFieldName : partitionKeys) {
PlanTranslationUtil.prepareVarAndExpression(keyFieldName, payloadVar, pkVars, pkExprs, varRefsForLoading, context);
}
AssignOperator assign = new AssignOperator(pkVars, pkExprs);
assign.getInputs().add(new MutableObject<>(dssOp));
// If the input is pre-sorted, we set the ordering property explicitly in the assign
if (clffs.alreadySorted()) {
List<OrderColumn> orderColumns = new ArrayList<>();
for (int i = 0; i < pkVars.size(); ++i) {
orderColumns.add(new OrderColumn(pkVars.get(i), OrderKind.ASC));
}
assign.setExplicitOrderingProperty(new LocalOrderProperty(orderColumns));
}
List<String> additionalFilteringField = DatasetUtil.getFilterField(targetDatasource.getDataset());
List<LogicalVariable> additionalFilteringVars;
List<Mutable<ILogicalExpression>> additionalFilteringAssignExpressions;
List<Mutable<ILogicalExpression>> additionalFilteringExpressions = null;
AssignOperator additionalFilteringAssign = null;
if (additionalFilteringField != null) {
additionalFilteringVars = new ArrayList<>();
additionalFilteringAssignExpressions = new ArrayList<>();
additionalFilteringExpressions = new ArrayList<>();
PlanTranslationUtil.prepareVarAndExpression(additionalFilteringField, payloadVar, additionalFilteringVars, additionalFilteringAssignExpressions, additionalFilteringExpressions, context);
additionalFilteringAssign = new AssignOperator(additionalFilteringVars, additionalFilteringAssignExpressions);
}
InsertDeleteUpsertOperator insertOp = new InsertDeleteUpsertOperator(targetDatasource, payloadRef, varRefsForLoading, InsertDeleteUpsertOperator.Kind.INSERT, true);
insertOp.setAdditionalFilteringExpressions(additionalFilteringExpressions);
if (additionalFilteringAssign != null) {
additionalFilteringAssign.getInputs().add(new MutableObject<>(assign));
insertOp.getInputs().add(new MutableObject<>(additionalFilteringAssign));
} else {
insertOp.getInputs().add(new MutableObject<>(assign));
}
ILogicalOperator leafOperator = new SinkOperator();
leafOperator.getInputs().add(new MutableObject<>(insertOp));
return new ALogicalPlanImpl(new MutableObject<>(leafOperator));
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator in project asterixdb by apache.
the class ComplexUnnestToProductRule method rewritePost.
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
if (op.getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN && op.getOperatorTag() != LogicalOperatorTag.UNNEST) {
return false;
}
//stop rewriting if the operators originates from a nested tuple source
if (insideSubplan(opRef)) {
return false;
}
// We may pull selects above the join we create in order to eliminate possible dependencies between
// the outer and inner input plans of the join.
List<ILogicalOperator> topSelects = new ArrayList<ILogicalOperator>();
// Keep track of the operators and used variables participating in the inner input plan.
HashSet<LogicalVariable> innerUsedVars = new HashSet<LogicalVariable>();
List<ILogicalOperator> innerOps = new ArrayList<ILogicalOperator>();
HashSet<LogicalVariable> outerUsedVars = new HashSet<LogicalVariable>();
List<ILogicalOperator> outerOps = new ArrayList<ILogicalOperator>();
innerOps.add(op);
VariableUtilities.getUsedVariables(op, innerUsedVars);
Mutable<ILogicalOperator> opRef2 = op.getInputs().get(0);
AbstractLogicalOperator op2 = (AbstractLogicalOperator) opRef2.getValue();
// Find an unnest or join and partition the plan between the first unnest and that operator into independent parts.
if (!findPlanPartition(op2, innerUsedVars, outerUsedVars, innerOps, outerOps, topSelects, false)) {
// We could not find an unnest or join.
return false;
}
// The last operator must be an unnest or join.
AbstractLogicalOperator unnestOrJoin = (AbstractLogicalOperator) outerOps.get(outerOps.size() - 1);
ILogicalOperator outerRoot = null;
ILogicalOperator innerRoot = null;
EmptyTupleSourceOperator ets = new EmptyTupleSourceOperator();
// If we found a join, simply use it as the outer root.
if (unnestOrJoin.getOperatorTag() != LogicalOperatorTag.INNERJOIN && unnestOrJoin.getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) {
// We've found a second unnest. First, sanity check that the unnest does not output any live variables
// that are used by the plan above (until the first unnest).
List<LogicalVariable> liveVars = new ArrayList<>();
VariableUtilities.getLiveVariables(unnestOrJoin, liveVars);
for (LogicalVariable liveVar : liveVars) {
if (innerUsedVars.contains(liveVar)) {
return false;
}
}
// Continue finding a partitioning of the plan such that the inner and outer partitions are independent, in order to feed a join.
// Now, we look below the second unnest or join.
VariableUtilities.getUsedVariables(unnestOrJoin, outerUsedVars);
AbstractLogicalOperator unnestChild = (AbstractLogicalOperator) unnestOrJoin.getInputs().get(0).getValue();
if (!findPlanPartition(unnestChild, innerUsedVars, outerUsedVars, innerOps, outerOps, topSelects, true)) {
// We could not find a suitable partitioning.
return false;
}
}
innerRoot = buildOperatorChain(innerOps, ets, context);
context.computeAndSetTypeEnvironmentForOperator(innerRoot);
outerRoot = buildOperatorChain(outerOps, null, context);
context.computeAndSetTypeEnvironmentForOperator(outerRoot);
InnerJoinOperator product = new InnerJoinOperator(new MutableObject<ILogicalExpression>(ConstantExpression.TRUE));
// Outer branch.
product.getInputs().add(new MutableObject<ILogicalOperator>(outerRoot));
// Inner branch.
product.getInputs().add(new MutableObject<ILogicalOperator>(innerRoot));
context.computeAndSetTypeEnvironmentForOperator(product);
// Put the selects on top of the join.
ILogicalOperator topOp = product;
if (!topSelects.isEmpty()) {
topOp = buildOperatorChain(topSelects, product, context);
}
// Plug the selects + product in the plan.
opRef.setValue(topOp);
context.computeAndSetTypeEnvironmentForOperator(topOp);
return true;
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator in project asterixdb by apache.
the class LogicalOperatorDeepCopyWithNewVariablesVisitor method visitEmptyTupleSourceOperator.
@Override
public ILogicalOperator visitEmptyTupleSourceOperator(EmptyTupleSourceOperator op, ILogicalOperator arg) {
EmptyTupleSourceOperator opCopy = new EmptyTupleSourceOperator();
opCopy.setExecutionMode(op.getExecutionMode());
return opCopy;
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator in project asterixdb by apache.
the class SimpleUnnestToProductRule method rewritePost.
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
ILogicalOperator op = opRef.getValue();
if (!isScanOrUnnest(op)) {
return false;
}
Mutable<ILogicalOperator> opRef2 = op.getInputs().get(0);
ILogicalOperator op2 = opRef2.getValue();
if (!isScanOrUnnest(op2) && !descOrSelfIsSourceScan(op2)) {
return false;
}
// Make sure that op does not use any variables produced by op2.
if (!opsAreIndependent(op, op2)) {
return false;
}
/**
* finding the boundary between left branch and right branch
* operator pipeline on-top-of boundaryOpRef (exclusive) is the inner branch
* operator pipeline under boundaryOpRef (inclusive) is the outer branch
*/
Mutable<ILogicalOperator> currentOpRef = opRef;
Mutable<ILogicalOperator> boundaryOpRef = currentOpRef.getValue().getInputs().get(0);
while (currentOpRef.getValue().getInputs().size() == 1) {
currentOpRef = currentOpRef.getValue().getInputs().get(0);
}
Mutable<ILogicalOperator> tupleSourceOpRef = currentOpRef;
currentOpRef = opRef;
if (tupleSourceOpRef.getValue().getOperatorTag() == LogicalOperatorTag.NESTEDTUPLESOURCE) {
while (currentOpRef.getValue().getInputs().size() == 1 && /*
* When this rule is fired,
* Unnests with a dataset function have been rewritten to DataSourceScans and
* AccessMethod related rewriting hasn't been done. Therefore, we only need
* to check if currentOpRef holds a DataSourceScanOperator.
*/
currentOpRef.getValue().getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN && descOrSelfIsSourceScan(currentOpRef.getValue())) {
if (opsAreIndependent(currentOpRef.getValue(), tupleSourceOpRef.getValue())) {
/** move down the boundary if the operator is independent of the tuple source */
boundaryOpRef = currentOpRef.getValue().getInputs().get(0);
} else {
break;
}
currentOpRef = currentOpRef.getValue().getInputs().get(0);
}
} else {
//Move the boundary below any top const assigns.
boundaryOpRef = opRef.getValue().getInputs().get(0);
while (boundaryOpRef.getValue().getInputs().size() == 1 && /*
* When this rule is fired,
* Unnests with a dataset function have been rewritten to DataSourceScans and
* AccessMethod related rewriting hasn't been done. Therefore, we only need
* to check if boundaryOpRef holds a DataSourceScanOperator.
*/
boundaryOpRef.getValue().getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN) {
List<LogicalVariable> opUsedVars = new ArrayList<LogicalVariable>();
VariableUtilities.getUsedVariables(boundaryOpRef.getValue(), opUsedVars);
if (opUsedVars.size() == 0 && OperatorPropertiesUtil.isMovable(boundaryOpRef.getValue())) /* We cannot freely move the location of operators tagged as un-movable. */
{
// move down the boundary if the operator is a const assigns.
boundaryOpRef = boundaryOpRef.getValue().getInputs().get(0);
} else {
break;
}
}
}
// If the left branch has cardinality one, we do not need to rewrite the unary pipeline
// into a cartesian product.
ILogicalOperator innerBranchOperator = opRef.getValue();
ILogicalOperator boundaryOperator = boundaryOpRef.getValue();
if (OperatorPropertiesUtil.isCardinalityZeroOrOne(boundaryOperator) && // Fixing ASTERIXDB-1620 will ensure correctness for external datasets.
!descOrSelfIsLeafSourceScan(innerBranchOperator, boundaryOperator)) {
return false;
}
/** join the two independent branches */
InnerJoinOperator join = new InnerJoinOperator(new MutableObject<>(ConstantExpression.TRUE), new MutableObject<>(boundaryOperator), new MutableObject<>(innerBranchOperator));
opRef.setValue(join);
ILogicalOperator ets = new EmptyTupleSourceOperator();
context.computeAndSetTypeEnvironmentForOperator(ets);
boundaryOpRef.setValue(ets);
context.computeAndSetTypeEnvironmentForOperator(boundaryOperator);
context.computeAndSetTypeEnvironmentForOperator(innerBranchOperator);
context.computeAndSetTypeEnvironmentForOperator(join);
return true;
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator in project asterixdb by apache.
the class NestedSubplanToJoinRule method rewriteNestedTupleSource.
/**
* rewrite NestedTupleSource operators to EmptyTupleSource operators
*
* @param nestedRootRef
*/
private void rewriteNestedTupleSource(Mutable<ILogicalOperator> nestedRootRef, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator nestedRoot = (AbstractLogicalOperator) nestedRootRef.getValue();
if (nestedRoot.getOperatorTag() == LogicalOperatorTag.NESTEDTUPLESOURCE) {
ILogicalOperator ets = new EmptyTupleSourceOperator();
nestedRootRef.setValue(ets);
context.computeAndSetTypeEnvironmentForOperator(ets);
}
List<Mutable<ILogicalOperator>> inputs = nestedRoot.getInputs();
for (Mutable<ILogicalOperator> input : inputs) {
rewriteNestedTupleSource(input, context);
}
}
Aggregations