use of org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain in project asterixdb by apache.
the class LangExpressionToPlanTranslator method validateDatasetInfo.
private DatasetDataSource validateDatasetInfo(MetadataProvider metadataProvider, String dataverseName, String datasetName) throws AlgebricksException {
Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName);
if (dataset == null) {
throw new AlgebricksException("Cannot find dataset " + datasetName + " in dataverse " + dataverseName);
}
if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
throw new AlgebricksException("Cannot write output to an external dataset.");
}
DataSourceId sourceId = new DataSourceId(dataverseName, datasetName);
IAType itemType = metadataProvider.findType(dataset.getItemTypeDataverseName(), dataset.getItemTypeName());
IAType metaItemType = metadataProvider.findType(dataset.getMetaItemTypeDataverseName(), dataset.getMetaItemTypeName());
INodeDomain domain = metadataProvider.findNodeDomain(dataset.getNodeGroupName());
return new DatasetDataSource(sourceId, dataset, itemType, metaItemType, DataSource.Type.INTERNAL_DATASET, dataset.getDatasetDetails(), domain);
}
use of org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain in project asterixdb by apache.
the class EnforceStructuralPropertiesRule method physOptimizeOp.
private boolean physOptimizeOp(Mutable<ILogicalOperator> opRef, IPhysicalPropertiesVector required, boolean nestedPlan, IOptimizationContext context) throws AlgebricksException {
boolean changed = false;
AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
optimizeUsingConstraintsAndEquivClasses(op);
PhysicalRequirements pr = op.getRequiredPhysicalPropertiesForChildren(required, context);
IPhysicalPropertiesVector[] reqdProperties = null;
if (pr != null) {
reqdProperties = pr.getRequiredProperties();
}
boolean opIsRedundantSort = false;
// compute properties and figure out the domain
INodeDomain childrenDomain = null;
{
int j = 0;
for (Mutable<ILogicalOperator> childRef : op.getInputs()) {
AbstractLogicalOperator child = (AbstractLogicalOperator) childRef.getValue();
// recursive call
if (physOptimizeOp(childRef, reqdProperties[j], nestedPlan, context)) {
changed = true;
}
child.computeDeliveredPhysicalProperties(context);
IPhysicalPropertiesVector delivered = child.getDeliveredPhysicalProperties();
if (childrenDomain == null) {
childrenDomain = delivered.getPartitioningProperty().getNodeDomain();
} else {
INodeDomain dom2 = delivered.getPartitioningProperty().getNodeDomain();
if (!childrenDomain.sameAs(dom2)) {
childrenDomain = context.getComputationNodeDomain();
}
}
j++;
}
}
if (reqdProperties != null) {
for (int k = 0; k < reqdProperties.length; k++) {
IPhysicalPropertiesVector pv = reqdProperties[k];
IPartitioningProperty pp = pv.getPartitioningProperty();
if (pp != null && pp.getNodeDomain() == null) {
pp.setNodeDomain(childrenDomain);
}
}
}
// The child index of the child operator to optimize first.
int startChildIndex = getStartChildIndex(op, pr, nestedPlan, context);
IPartitioningProperty firstDeliveredPartitioning = null;
// Enforce data properties in a top-down manner.
for (int j = 0; j < op.getInputs().size(); j++) {
// Starts from a partitioning-compatible child if any to loop over all children.
int childIndex = (j + startChildIndex) % op.getInputs().size();
IPhysicalPropertiesVector requiredProperty = reqdProperties[childIndex];
AbstractLogicalOperator child = (AbstractLogicalOperator) op.getInputs().get(childIndex).getValue();
IPhysicalPropertiesVector delivered = child.getDeliveredPhysicalProperties();
AlgebricksConfig.ALGEBRICKS_LOGGER.finest(">>>> Properties delivered by " + child.getPhysicalOperator() + ": " + delivered + "\n");
IPartitioningRequirementsCoordinator prc = pr.getPartitioningCoordinator();
// Coordinates requirements by looking at the firstDeliveredPartitioning.
Pair<Boolean, IPartitioningProperty> pbpp = prc.coordinateRequirements(requiredProperty.getPartitioningProperty(), firstDeliveredPartitioning, op, context);
boolean mayExpandPartitioningProperties = pbpp.first;
IPhysicalPropertiesVector rqd = new StructuralPropertiesVector(pbpp.second, requiredProperty.getLocalProperties());
AlgebricksConfig.ALGEBRICKS_LOGGER.finest(">>>> Required properties for " + child.getPhysicalOperator() + ": " + rqd + "\n");
// The partitioning property of reqdProperties[childIndex] could be updated here because
// rqd.getPartitioningProperty() is the same object instance as requiredProperty.getPartitioningProperty().
IPhysicalPropertiesVector diff = delivered.getUnsatisfiedPropertiesFrom(rqd, mayExpandPartitioningProperties, context.getEquivalenceClassMap(child), context.getFDList(child));
if (isRedundantSort(opRef, delivered, diff, context)) {
opIsRedundantSort = true;
}
if (diff != null) {
changed = true;
addEnforcers(op, childIndex, diff, rqd, delivered, childrenDomain, nestedPlan, context);
AbstractLogicalOperator newChild = (AbstractLogicalOperator) op.getInputs().get(childIndex).getValue();
if (newChild != child) {
delivered = newChild.getDeliveredPhysicalProperties();
IPhysicalPropertiesVector newDiff = newPropertiesDiff(newChild, rqd, mayExpandPartitioningProperties, context);
AlgebricksConfig.ALGEBRICKS_LOGGER.finest(">>>> New properties diff: " + newDiff + "\n");
if (isRedundantSort(opRef, delivered, newDiff, context)) {
opIsRedundantSort = true;
break;
}
}
}
if (firstDeliveredPartitioning == null) {
firstDeliveredPartitioning = delivered.getPartitioningProperty();
}
}
if (op.hasNestedPlans()) {
AbstractOperatorWithNestedPlans nested = (AbstractOperatorWithNestedPlans) op;
for (ILogicalPlan p : nested.getNestedPlans()) {
if (physOptimizePlan(p, required, true, context)) {
changed = true;
}
}
}
if (opIsRedundantSort) {
if (AlgebricksConfig.DEBUG) {
AlgebricksConfig.ALGEBRICKS_LOGGER.fine(">>>> Removing redundant SORT operator " + op.getPhysicalOperator() + "\n");
printOp(op);
}
changed = true;
AbstractLogicalOperator nextOp = (AbstractLogicalOperator) op.getInputs().get(0).getValue();
if (nextOp.getOperatorTag() == LogicalOperatorTag.PROJECT) {
nextOp = (AbstractLogicalOperator) nextOp.getInputs().get(0).getValue();
}
opRef.setValue(nextOp);
// Now, transfer annotations from the original sort op. to this one.
AbstractLogicalOperator transferTo = nextOp;
if (transferTo.getOperatorTag() == LogicalOperatorTag.EXCHANGE) {
// remove duplicate exchange operator
transferTo = (AbstractLogicalOperator) transferTo.getInputs().get(0).getValue();
}
transferTo.getAnnotations().putAll(op.getAnnotations());
physOptimizeOp(opRef, required, nestedPlan, context);
}
return changed;
}
use of org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain in project asterixdb by apache.
the class SetAsterixPhysicalOperatorsRule method computeDefaultPhysicalOp.
private static void computeDefaultPhysicalOp(AbstractLogicalOperator op, IOptimizationContext context) throws AlgebricksException {
PhysicalOptimizationConfig physicalOptimizationConfig = context.getPhysicalOptimizationConfig();
if (op.getOperatorTag().equals(LogicalOperatorTag.GROUP)) {
GroupByOperator gby = (GroupByOperator) op;
if (gby.getNestedPlans().size() == 1) {
ILogicalPlan p0 = gby.getNestedPlans().get(0);
if (p0.getRoots().size() == 1) {
Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.AGGREGATE)) {
AggregateOperator aggOp = (AggregateOperator) r0.getValue();
boolean serializable = true;
for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) exprRef.getValue();
if (!BuiltinFunctions.isAggregateFunctionSerializable(expr.getFunctionIdentifier())) {
serializable = false;
break;
}
}
if ((gby.getAnnotations().get(OperatorAnnotations.USE_HASH_GROUP_BY) == Boolean.TRUE || gby.getAnnotations().get(OperatorAnnotations.USE_EXTERNAL_GROUP_BY) == Boolean.TRUE)) {
boolean setToExternalGby = false;
if (serializable) {
// if serializable, use external group-by
// now check whether the serialized version aggregation function has corresponding intermediate agg
boolean hasIntermediateAgg = true;
IMergeAggregationExpressionFactory mergeAggregationExpressionFactory = context.getMergeAggregationExpressionFactory();
List<LogicalVariable> originalVariables = aggOp.getVariables();
List<Mutable<ILogicalExpression>> aggExprs = aggOp.getExpressions();
int aggNum = aggExprs.size();
for (int i = 0; i < aggNum; i++) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
if (mergeAggregationExpressionFactory.createMergeAggregation(originalVariables.get(i), serialAggExpr, context) == null) {
hasIntermediateAgg = false;
break;
}
}
// Check whether there are multiple aggregates in the sub plan.
// Currently, we don't support multiple aggregates in one external group-by.
boolean multipleAggOpsFound = false;
ILogicalOperator r1Logical = aggOp;
while (r1Logical.hasInputs()) {
r1Logical = r1Logical.getInputs().get(0).getValue();
if (r1Logical.getOperatorTag() == LogicalOperatorTag.AGGREGATE) {
multipleAggOpsFound = true;
break;
}
}
if (hasIntermediateAgg && !multipleAggOpsFound) {
for (int i = 0; i < aggNum; i++) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
aggOp.getExpressions().get(i).setValue(serialAggExpr);
}
ExternalGroupByPOperator externalGby = new ExternalGroupByPOperator(gby.getGroupByList(), physicalOptimizationConfig.getMaxFramesExternalGroupBy(), (long) physicalOptimizationConfig.getMaxFramesExternalGroupBy() * physicalOptimizationConfig.getFrameSize());
generateMergeAggregationExpressions(gby, context);
op.setPhysicalOperator(externalGby);
setToExternalGby = true;
}
}
if (!setToExternalGby) {
// if not serializable or no intermediate agg, use pre-clustered group-by
List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
ILogicalExpression expr = p.second.getValue();
if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
columnList.add(varRef.getVariableReference());
}
}
op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
}
}
} else if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.RUNNINGAGGREGATE)) {
List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
ILogicalExpression expr = p.second.getValue();
if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
columnList.add(varRef.getVariableReference());
}
}
op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
} else {
throw new AlgebricksException("Unsupported nested operator within a group-by: " + ((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().name());
}
}
}
}
if (op.getPhysicalOperator() == null) {
switch(op.getOperatorTag()) {
case INNERJOIN:
{
JoinUtils.setJoinAlgorithmAndExchangeAlgo((InnerJoinOperator) op, context);
break;
}
case LEFTOUTERJOIN:
{
JoinUtils.setJoinAlgorithmAndExchangeAlgo((LeftOuterJoinOperator) op, context);
break;
}
case UNNEST_MAP:
case LEFT_OUTER_UNNEST_MAP:
{
ILogicalExpression unnestExpr = null;
unnestExpr = ((AbstractUnnestMapOperator) op).getExpressionRef().getValue();
if (unnestExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) unnestExpr;
FunctionIdentifier fid = f.getFunctionIdentifier();
if (!fid.equals(BuiltinFunctions.INDEX_SEARCH)) {
throw new IllegalStateException();
}
AccessMethodJobGenParams jobGenParams = new AccessMethodJobGenParams();
jobGenParams.readFromFuncArgs(f.getArguments());
MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
DataSourceId dataSourceId = new DataSourceId(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
Dataset dataset = mp.findDataset(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
IDataSourceIndex<String, DataSourceId> dsi = mp.findDataSourceIndex(jobGenParams.getIndexName(), dataSourceId);
INodeDomain storageDomain = mp.findNodeDomain(dataset.getNodeGroupName());
if (dsi == null) {
throw new AlgebricksException("Could not find index " + jobGenParams.getIndexName() + " for dataset " + dataSourceId);
}
IndexType indexType = jobGenParams.getIndexType();
boolean requiresBroadcast = jobGenParams.getRequiresBroadcast();
switch(indexType) {
case BTREE:
{
BTreeJobGenParams btreeJobGenParams = new BTreeJobGenParams();
btreeJobGenParams.readFromFuncArgs(f.getArguments());
op.setPhysicalOperator(new BTreeSearchPOperator(dsi, storageDomain, requiresBroadcast, btreeJobGenParams.isPrimaryIndex(), btreeJobGenParams.isEqCondition(), btreeJobGenParams.getLowKeyVarList(), btreeJobGenParams.getHighKeyVarList()));
break;
}
case RTREE:
{
op.setPhysicalOperator(new RTreeSearchPOperator(dsi, storageDomain, requiresBroadcast));
break;
}
case SINGLE_PARTITION_WORD_INVIX:
case SINGLE_PARTITION_NGRAM_INVIX:
{
op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, false));
break;
}
case LENGTH_PARTITIONED_WORD_INVIX:
case LENGTH_PARTITIONED_NGRAM_INVIX:
{
op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, true));
break;
}
default:
{
throw new NotImplementedException(indexType + " indexes are not implemented.");
}
}
}
break;
}
}
}
if (op.hasNestedPlans()) {
AbstractOperatorWithNestedPlans nested = (AbstractOperatorWithNestedPlans) op;
for (ILogicalPlan p : nested.getNestedPlans()) {
setPhysicalOperators(p, context);
}
}
for (Mutable<ILogicalOperator> opRef : op.getInputs()) {
computeDefaultPhysicalOp((AbstractLogicalOperator) opRef.getValue(), context);
}
}
use of org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain in project asterixdb by apache.
the class DataSourceScanPOperator method getRequiredPropertiesForChildren.
@Override
public PhysicalRequirements getRequiredPropertiesForChildren(ILogicalOperator op, IPhysicalPropertiesVector reqdByParent, IOptimizationContext context) {
if (op.getInputs().isEmpty()) {
return emptyUnaryRequirements();
}
ILogicalOperator childOp = op.getInputs().get(0).getValue();
// Empty tuple source is a special case that can be partitioned in the same way as the data scan.
if (childOp.getOperatorTag() == LogicalOperatorTag.EMPTYTUPLESOURCE) {
return emptyUnaryRequirements();
}
INodeDomain domain = dataSource.getDomain();
return new PhysicalRequirements(new StructuralPropertiesVector[] { new StructuralPropertiesVector(new BroadcastPartitioningProperty(domain), null) }, IPartitioningRequirementsCoordinator.NO_COORDINATION);
}
use of org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain in project asterixdb by apache.
the class IntroduceRandomPartitioningFeedComputationRule method rewritePre.
@Override
public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
ILogicalOperator op = opRef.getValue();
if (!op.getOperatorTag().equals(LogicalOperatorTag.ASSIGN)) {
return false;
}
ILogicalOperator opChild = op.getInputs().get(0).getValue();
if (!opChild.getOperatorTag().equals(LogicalOperatorTag.DATASOURCESCAN)) {
return false;
}
DataSourceScanOperator scanOp = (DataSourceScanOperator) opChild;
DataSource dataSource = (DataSource) scanOp.getDataSource();
if (dataSource.getDatasourceType() != DataSource.Type.FEED) {
return false;
}
final FeedDataSource feedDataSource = (FeedDataSource) dataSource;
FeedConnection feedConnection = feedDataSource.getFeedConnection();
if (feedConnection.getAppliedFunctions() == null || feedConnection.getAppliedFunctions().size() == 0) {
return false;
}
ExchangeOperator exchangeOp = new ExchangeOperator();
INodeDomain domain = new INodeDomain() {
@Override
public boolean sameAs(INodeDomain domain) {
return domain == this;
}
@Override
public Integer cardinality() {
return feedDataSource.getComputeCardinality();
}
};
exchangeOp.setPhysicalOperator(new RandomPartitionExchangePOperator(domain));
op.getInputs().get(0).setValue(exchangeOp);
exchangeOp.getInputs().add(new MutableObject<ILogicalOperator>(scanOp));
ExecutionMode em = ((AbstractLogicalOperator) scanOp).getExecutionMode();
exchangeOp.setExecutionMode(em);
exchangeOp.computeDeliveredPhysicalProperties(context);
context.computeAndSetTypeEnvironmentForOperator(exchangeOp);
AssignOperator assignOp = (AssignOperator) opRef.getValue();
AssignPOperator assignPhyOp = (AssignPOperator) assignOp.getPhysicalOperator();
assignPhyOp.setCardinalityConstraint(domain.cardinality());
return true;
}
Aggregations