use of org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator in project asterixdb by apache.
the class IntroduceGroupByForSubplanRule method rewritePost.
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator op0 = (AbstractLogicalOperator) opRef.getValue();
if (op0.getOperatorTag() != LogicalOperatorTag.SUBPLAN) {
return false;
}
SubplanOperator subplan = (SubplanOperator) op0;
Iterator<ILogicalPlan> plansIter = subplan.getNestedPlans().iterator();
ILogicalPlan p = null;
while (plansIter.hasNext()) {
p = plansIter.next();
}
if (p == null) {
return false;
}
if (p.getRoots().size() != 1) {
return false;
}
Mutable<ILogicalOperator> subplanRoot = p.getRoots().get(0);
AbstractLogicalOperator op1 = (AbstractLogicalOperator) subplanRoot.getValue();
Mutable<ILogicalOperator> botRef = subplanRoot;
AbstractLogicalOperator op2;
// Project is optional
if (op1.getOperatorTag() != LogicalOperatorTag.PROJECT) {
op2 = op1;
} else {
ProjectOperator project = (ProjectOperator) op1;
botRef = project.getInputs().get(0);
op2 = (AbstractLogicalOperator) botRef.getValue();
}
if (op2.getOperatorTag() != LogicalOperatorTag.AGGREGATE) {
return false;
}
AggregateOperator aggregate = (AggregateOperator) op2;
Set<LogicalVariable> free = new HashSet<LogicalVariable>();
VariableUtilities.getUsedVariables(aggregate, free);
Mutable<ILogicalOperator> op3Ref = aggregate.getInputs().get(0);
AbstractLogicalOperator op3 = (AbstractLogicalOperator) op3Ref.getValue();
while (op3.getInputs().size() == 1) {
Set<LogicalVariable> prod = new HashSet<LogicalVariable>();
VariableUtilities.getProducedVariables(op3, prod);
free.removeAll(prod);
VariableUtilities.getUsedVariables(op3, free);
botRef = op3Ref;
op3Ref = op3.getInputs().get(0);
op3 = (AbstractLogicalOperator) op3Ref.getValue();
}
if (op3.getOperatorTag() != LogicalOperatorTag.INNERJOIN && op3.getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) {
return false;
}
AbstractBinaryJoinOperator join = (AbstractBinaryJoinOperator) op3;
if (join.getCondition().getValue() == ConstantExpression.TRUE) {
return false;
}
VariableUtilities.getUsedVariables(join, free);
AbstractLogicalOperator b0 = (AbstractLogicalOperator) join.getInputs().get(0).getValue();
// see if there's an NTS at the end of the pipeline
NestedTupleSourceOperator outerNts = getNts(b0);
if (outerNts == null) {
AbstractLogicalOperator b1 = (AbstractLogicalOperator) join.getInputs().get(1).getValue();
outerNts = getNts(b1);
if (outerNts == null) {
return false;
}
}
Set<LogicalVariable> pkVars = computeGbyVars(outerNts, free, context);
if (pkVars == null || pkVars.size() < 1) {
// there is no non-trivial primary key, group-by keys are all live variables
// that were produced by descendant or self
ILogicalOperator subplanInput = subplan.getInputs().get(0).getValue();
pkVars = new HashSet<LogicalVariable>();
//get live variables
VariableUtilities.getLiveVariables(subplanInput, pkVars);
//get produced variables
Set<LogicalVariable> producedVars = new HashSet<LogicalVariable>();
VariableUtilities.getProducedVariablesInDescendantsAndSelf(subplanInput, producedVars);
//retain the intersection
pkVars.retainAll(producedVars);
}
AlgebricksConfig.ALGEBRICKS_LOGGER.fine("Found FD for introducing group-by: " + pkVars);
Mutable<ILogicalOperator> rightRef = join.getInputs().get(1);
LogicalVariable testForNull = null;
AbstractLogicalOperator right = (AbstractLogicalOperator) rightRef.getValue();
switch(right.getOperatorTag()) {
case UNNEST:
{
UnnestOperator innerUnnest = (UnnestOperator) right;
// Select [ $y != null ]
testForNull = innerUnnest.getVariable();
break;
}
case RUNNINGAGGREGATE:
{
ILogicalOperator inputToRunningAggregate = right.getInputs().get(0).getValue();
Set<LogicalVariable> producedVars = new ListSet<LogicalVariable>();
VariableUtilities.getProducedVariables(inputToRunningAggregate, producedVars);
if (!producedVars.isEmpty()) {
// Select [ $y != null ]
testForNull = producedVars.iterator().next();
}
break;
}
case DATASOURCESCAN:
{
DataSourceScanOperator innerScan = (DataSourceScanOperator) right;
// Select [ $y != null ]
if (innerScan.getVariables().size() == 1) {
testForNull = innerScan.getVariables().get(0);
}
break;
}
default:
break;
}
if (testForNull == null) {
testForNull = context.newVar();
AssignOperator tmpAsgn = new AssignOperator(testForNull, new MutableObject<ILogicalExpression>(ConstantExpression.TRUE));
tmpAsgn.getInputs().add(new MutableObject<ILogicalOperator>(rightRef.getValue()));
rightRef.setValue(tmpAsgn);
context.computeAndSetTypeEnvironmentForOperator(tmpAsgn);
}
IFunctionInfo finfoEq = context.getMetadataProvider().lookupFunction(AlgebricksBuiltinFunctions.IS_MISSING);
ILogicalExpression isNullTest = new ScalarFunctionCallExpression(finfoEq, new MutableObject<ILogicalExpression>(new VariableReferenceExpression(testForNull)));
IFunctionInfo finfoNot = context.getMetadataProvider().lookupFunction(AlgebricksBuiltinFunctions.NOT);
ScalarFunctionCallExpression nonNullTest = new ScalarFunctionCallExpression(finfoNot, new MutableObject<ILogicalExpression>(isNullTest));
SelectOperator selectNonNull = new SelectOperator(new MutableObject<ILogicalExpression>(nonNullTest), false, null);
GroupByOperator g = new GroupByOperator();
Mutable<ILogicalOperator> newSubplanRef = new MutableObject<ILogicalOperator>(subplan);
NestedTupleSourceOperator nts = new NestedTupleSourceOperator(new MutableObject<ILogicalOperator>(g));
opRef.setValue(g);
selectNonNull.getInputs().add(new MutableObject<ILogicalOperator>(nts));
List<Mutable<ILogicalOperator>> prodInpList = botRef.getValue().getInputs();
prodInpList.clear();
prodInpList.add(new MutableObject<ILogicalOperator>(selectNonNull));
ILogicalPlan gPlan = new ALogicalPlanImpl(new MutableObject<ILogicalOperator>(subplanRoot.getValue()));
g.getNestedPlans().add(gPlan);
subplanRoot.setValue(op3Ref.getValue());
g.getInputs().add(newSubplanRef);
HashSet<LogicalVariable> underVars = new HashSet<LogicalVariable>();
VariableUtilities.getLiveVariables(subplan.getInputs().get(0).getValue(), underVars);
underVars.removeAll(pkVars);
Map<LogicalVariable, LogicalVariable> mappedVars = buildVarExprList(pkVars, context, g, g.getGroupByList());
context.updatePrimaryKeys(mappedVars);
for (LogicalVariable uv : underVars) {
g.getDecorList().add(new Pair<LogicalVariable, Mutable<ILogicalExpression>>(null, new MutableObject<ILogicalExpression>(new VariableReferenceExpression(uv))));
}
OperatorPropertiesUtil.typeOpRec(subplanRoot, context);
OperatorPropertiesUtil.typeOpRec(gPlan.getRoots().get(0), context);
context.computeAndSetTypeEnvironmentForOperator(g);
return true;
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator in project asterixdb by apache.
the class IntroduceRandomPartitioningFeedComputationRule method rewritePre.
@Override
public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
ILogicalOperator op = opRef.getValue();
if (!op.getOperatorTag().equals(LogicalOperatorTag.ASSIGN)) {
return false;
}
ILogicalOperator opChild = op.getInputs().get(0).getValue();
if (!opChild.getOperatorTag().equals(LogicalOperatorTag.DATASOURCESCAN)) {
return false;
}
DataSourceScanOperator scanOp = (DataSourceScanOperator) opChild;
DataSource dataSource = (DataSource) scanOp.getDataSource();
if (dataSource.getDatasourceType() != DataSource.Type.FEED) {
return false;
}
final FeedDataSource feedDataSource = (FeedDataSource) dataSource;
FeedConnection feedConnection = feedDataSource.getFeedConnection();
if (feedConnection.getAppliedFunctions() == null || feedConnection.getAppliedFunctions().size() == 0) {
return false;
}
ExchangeOperator exchangeOp = new ExchangeOperator();
INodeDomain domain = new INodeDomain() {
@Override
public boolean sameAs(INodeDomain domain) {
return domain == this;
}
@Override
public Integer cardinality() {
return feedDataSource.getComputeCardinality();
}
};
exchangeOp.setPhysicalOperator(new RandomPartitionExchangePOperator(domain));
op.getInputs().get(0).setValue(exchangeOp);
exchangeOp.getInputs().add(new MutableObject<ILogicalOperator>(scanOp));
ExecutionMode em = ((AbstractLogicalOperator) scanOp).getExecutionMode();
exchangeOp.setExecutionMode(em);
exchangeOp.computeDeliveredPhysicalProperties(context);
context.computeAndSetTypeEnvironmentForOperator(exchangeOp);
AssignOperator assignOp = (AssignOperator) opRef.getValue();
AssignPOperator assignPhyOp = (AssignPOperator) assignOp.getPhysicalOperator();
assignPhyOp.setCardinalityConstraint(domain.cardinality());
return true;
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator in project asterixdb by apache.
the class PushFieldAccessRule method isAccessToIndexedField.
@SuppressWarnings("unchecked")
private boolean isAccessToIndexedField(AssignOperator assign, IOptimizationContext context) throws AlgebricksException {
AbstractFunctionCallExpression accessFun = (AbstractFunctionCallExpression) assign.getExpressions().get(0).getValue();
ILogicalExpression e0 = accessFun.getArguments().get(0).getValue();
if (e0.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
return false;
}
LogicalVariable var = ((VariableReferenceExpression) e0).getVariableReference();
if (context.findPrimaryKey(var) == null) {
// not referring to a dataset record
return false;
}
AbstractLogicalOperator op = assign;
while (op.getInputs().size() == 1 && op.getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN) {
op = (AbstractLogicalOperator) op.getInputs().get(0).getValue();
}
if (op.getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN) {
return false;
}
DataSourceScanOperator scan = (DataSourceScanOperator) op;
LogicalVariable recVar = scan.getVariables().get(scan.getVariables().size() - 1);
if (recVar != var) {
return false;
}
MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
DataSourceId asid = ((IDataSource<DataSourceId>) scan.getDataSource()).getId();
Dataset dataset = mp.findDataset(asid.getDataverseName(), asid.getDatasourceName());
if (dataset == null) {
throw new AlgebricksException("Dataset " + asid.getDatasourceName() + " not found.");
}
if (dataset.getDatasetType() != DatasetType.INTERNAL) {
return false;
}
final Integer pos = ConstantExpressionUtil.getIntConstant(accessFun.getArguments().get(1).getValue());
if (pos != null) {
String tName = dataset.getItemTypeName();
IAType t = mp.findType(dataset.getItemTypeDataverseName(), tName);
if (t.getTypeTag() != ATypeTag.OBJECT) {
return false;
}
ARecordType rt = (ARecordType) t;
if (pos >= rt.getFieldNames().length) {
return false;
}
}
List<Index> datasetIndexes = mp.getDatasetIndexes(dataset.getDataverseName(), dataset.getDatasetName());
boolean hasSecondaryIndex = false;
for (Index index : datasetIndexes) {
if (index.isSecondaryIndex()) {
hasSecondaryIndex = true;
break;
}
}
return hasSecondaryIndex;
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator in project asterixdb by apache.
the class IntroduceAutogenerateIDRule method rewritePost.
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
// match: commit OR distribute-result OR SINK - ... followed by:
// [insert to internal dataset with autogenerated id] - assign - project
// produce: insert - assign - assign* - project
// **
// OR [insert to internal dataset with autogenerated id] - assign - [datasource scan]
// produce insert - assign - assign* - datasource scan
AbstractLogicalOperator currentOp = (AbstractLogicalOperator) opRef.getValue();
if (currentOp.getOperatorTag() == LogicalOperatorTag.DELEGATE_OPERATOR) {
DelegateOperator dOp = (DelegateOperator) currentOp;
if (!(dOp.getDelegate() instanceof CommitOperator)) {
return false;
} else if (!((CommitOperator) dOp.getDelegate()).isSink()) {
return false;
}
} else if (currentOp.getOperatorTag() != LogicalOperatorTag.DISTRIBUTE_RESULT && currentOp.getOperatorTag() != LogicalOperatorTag.SINK) {
return false;
}
ArrayDeque<AbstractLogicalOperator> opStack = new ArrayDeque<>();
opStack.push(currentOp);
while (currentOp.getInputs().size() == 1) {
currentOp = (AbstractLogicalOperator) currentOp.getInputs().get(0).getValue();
if (currentOp.getOperatorTag() == LogicalOperatorTag.INSERT_DELETE_UPSERT) {
break;
}
opStack.push(currentOp);
}
if (currentOp.getOperatorTag() != LogicalOperatorTag.INSERT_DELETE_UPSERT) {
return false;
}
InsertDeleteUpsertOperator insertOp = (InsertDeleteUpsertOperator) currentOp;
if (insertOp.getOperation() != Kind.INSERT && insertOp.getOperation() != Kind.UPSERT) {
return false;
}
DatasetDataSource dds = (DatasetDataSource) insertOp.getDataSource();
boolean autogenerated = ((InternalDatasetDetails) dds.getDataset().getDatasetDetails()).isAutogenerated();
if (!autogenerated) {
return false;
}
if (((DataSource) insertOp.getDataSource()).getDatasourceType() != Type.INTERNAL_DATASET) {
return false;
}
AbstractLogicalOperator parentOp = (AbstractLogicalOperator) currentOp.getInputs().get(0).getValue();
if (parentOp.getOperatorTag() != LogicalOperatorTag.ASSIGN) {
return false;
}
AssignOperator assignOp = (AssignOperator) parentOp;
LogicalVariable inputRecord;
//TODO: bug here. will not work for internal datasets with filters since the pattern becomes
//[project-assign-assign-insert]
AbstractLogicalOperator grandparentOp = (AbstractLogicalOperator) parentOp.getInputs().get(0).getValue();
if (grandparentOp.getOperatorTag() == LogicalOperatorTag.PROJECT) {
ProjectOperator projectOp = (ProjectOperator) grandparentOp;
inputRecord = projectOp.getVariables().get(0);
} else if (grandparentOp.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
DataSourceScanOperator dssOp = (DataSourceScanOperator) grandparentOp;
inputRecord = dssOp.getVariables().get(0);
} else {
return false;
}
List<String> pkFieldName = ((InternalDatasetDetails) dds.getDataset().getDatasetDetails()).getPrimaryKey().get(0);
ILogicalExpression rec0 = new VariableReferenceExpression(inputRecord);
ILogicalExpression rec1 = createPrimaryKeyRecordExpression(pkFieldName);
ILogicalExpression mergedRec = createRecordMergeFunction(rec0, rec1);
ILogicalExpression nonNullMergedRec = createNotNullFunction(mergedRec);
LogicalVariable v = context.newVar();
AssignOperator newAssign = new AssignOperator(v, new MutableObject<ILogicalExpression>(nonNullMergedRec));
newAssign.getInputs().add(new MutableObject<ILogicalOperator>(grandparentOp));
assignOp.getInputs().set(0, new MutableObject<ILogicalOperator>(newAssign));
VariableUtilities.substituteVariables(assignOp, inputRecord, v, context);
VariableUtilities.substituteVariables(insertOp, inputRecord, v, context);
context.computeAndSetTypeEnvironmentForOperator(newAssign);
context.computeAndSetTypeEnvironmentForOperator(assignOp);
context.computeAndSetTypeEnvironmentForOperator(insertOp);
;
for (AbstractLogicalOperator op : opStack) {
VariableUtilities.substituteVariables(op, inputRecord, v, context);
context.computeAndSetTypeEnvironmentForOperator(op);
}
return true;
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator in project asterixdb by apache.
the class MetaKeyExpressionReferenceTransform method visit.
private ILogicalExpressionReferenceTransformWithCondition visit(Mutable<ILogicalOperator> opRef) throws AlgebricksException {
ILogicalOperator op = opRef.getValue();
// Reaches NTS or ETS.
if (op.getInputs().size() == 0) {
return NoOpExpressionReferenceTransform.INSTANCE;
}
// Datascan returns an useful transform if the meta part presents in the dataset.
if (op.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
DataSourceScanOperator scanOp = (DataSourceScanOperator) op;
ILogicalExpressionReferenceTransformWithCondition inputTransfomer = visit(op.getInputs().get(0));
DataSource dataSource = (DataSource) scanOp.getDataSource();
List<ILogicalExpressionReferenceTransformWithCondition> transformers = null;
List<LogicalVariable> allVars = scanOp.getVariables();
LogicalVariable dataVar = dataSource.getDataRecordVariable(allVars);
LogicalVariable metaVar = dataSource.getMetaVariable(allVars);
LogicalExpressionReferenceTransform currentTransformer = null;
// https://issues.apache.org/jira/browse/ASTERIXDB-1618
if (dataSource.getDatasourceType() != DataSource.Type.EXTERNAL_DATASET && dataSource.getDatasourceType() != DataSource.Type.INTERNAL_DATASET && dataSource.getDatasourceType() != DataSource.Type.LOADABLE) {
IMutationDataSource mds = (IMutationDataSource) dataSource;
if (mds.isChange()) {
transformers = new ArrayList<>();
transformers.add(new MetaKeyExpressionReferenceTransform(mds.getPkVars(allVars), mds.getKeyAccessExpression()));
} else if (metaVar != null) {
transformers = new ArrayList<>();
transformers.add(new MetaKeyToFieldAccessTransform(metaVar));
}
}
if (!dataSource.hasMeta() && transformers == null) {
return inputTransfomer;
}
if (metaVar != null) {
currentTransformer = new LogicalExpressionReferenceTransform(dataVar, metaVar);
}
if (inputTransfomer.equals(NoOpExpressionReferenceTransform.INSTANCE) && transformers == null) {
return currentTransformer;
} else if (inputTransfomer.equals(NoOpExpressionReferenceTransform.INSTANCE) && currentTransformer == null) {
return transformers.get(0);
} else {
// Requires an argument variable to resolve ambiguity.
if (transformers == null) {
transformers = new ArrayList<>();
}
if (!inputTransfomer.equals(NoOpExpressionReferenceTransform.INSTANCE)) {
inputTransfomer.setVariableRequired();
transformers.add(inputTransfomer);
}
currentTransformer.setVariableRequired();
transformers.add(currentTransformer);
return new CompositeExpressionReferenceTransform(transformers);
}
}
// Visits children in the depth-first order.
List<ILogicalExpressionReferenceTransformWithCondition> transformers = new ArrayList<>();
for (Mutable<ILogicalOperator> childRef : op.getInputs()) {
ILogicalExpressionReferenceTransformWithCondition transformer = visit(childRef);
if (!transformer.equals(NoOpExpressionReferenceTransform.INSTANCE)) {
transformers.add(transformer);
}
}
ILogicalExpressionReferenceTransformWithCondition currentTransformer = null;
if (transformers.size() == 0) {
currentTransformer = NoOpExpressionReferenceTransform.INSTANCE;
} else if (transformers.size() == 1) {
currentTransformer = transformers.get(0);
} else {
// Transformers in a CompositeTransformer should require argument variable check.
for (ILogicalExpressionReferenceTransformWithCondition transformer : transformers) {
transformer.setVariableRequired();
}
currentTransformer = new CompositeExpressionReferenceTransform(transformers);
}
rewritten |= op.acceptExpressionTransform(currentTransformer);
return currentTransformer;
}
Aggregations