use of org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier in project asterixdb by apache.
the class SetAsterixPhysicalOperatorsRule method computeDefaultPhysicalOp.
private static void computeDefaultPhysicalOp(AbstractLogicalOperator op, IOptimizationContext context) throws AlgebricksException {
PhysicalOptimizationConfig physicalOptimizationConfig = context.getPhysicalOptimizationConfig();
if (op.getOperatorTag().equals(LogicalOperatorTag.GROUP)) {
GroupByOperator gby = (GroupByOperator) op;
if (gby.getNestedPlans().size() == 1) {
ILogicalPlan p0 = gby.getNestedPlans().get(0);
if (p0.getRoots().size() == 1) {
Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.AGGREGATE)) {
AggregateOperator aggOp = (AggregateOperator) r0.getValue();
boolean serializable = true;
for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) exprRef.getValue();
if (!BuiltinFunctions.isAggregateFunctionSerializable(expr.getFunctionIdentifier())) {
serializable = false;
break;
}
}
if ((gby.getAnnotations().get(OperatorAnnotations.USE_HASH_GROUP_BY) == Boolean.TRUE || gby.getAnnotations().get(OperatorAnnotations.USE_EXTERNAL_GROUP_BY) == Boolean.TRUE)) {
boolean setToExternalGby = false;
if (serializable) {
// if serializable, use external group-by
// now check whether the serialized version aggregation function has corresponding intermediate agg
boolean hasIntermediateAgg = true;
IMergeAggregationExpressionFactory mergeAggregationExpressionFactory = context.getMergeAggregationExpressionFactory();
List<LogicalVariable> originalVariables = aggOp.getVariables();
List<Mutable<ILogicalExpression>> aggExprs = aggOp.getExpressions();
int aggNum = aggExprs.size();
for (int i = 0; i < aggNum; i++) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
if (mergeAggregationExpressionFactory.createMergeAggregation(originalVariables.get(i), serialAggExpr, context) == null) {
hasIntermediateAgg = false;
break;
}
}
// Check whether there are multiple aggregates in the sub plan.
// Currently, we don't support multiple aggregates in one external group-by.
boolean multipleAggOpsFound = false;
ILogicalOperator r1Logical = aggOp;
while (r1Logical.hasInputs()) {
r1Logical = r1Logical.getInputs().get(0).getValue();
if (r1Logical.getOperatorTag() == LogicalOperatorTag.AGGREGATE) {
multipleAggOpsFound = true;
break;
}
}
if (hasIntermediateAgg && !multipleAggOpsFound) {
for (int i = 0; i < aggNum; i++) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
aggOp.getExpressions().get(i).setValue(serialAggExpr);
}
ExternalGroupByPOperator externalGby = new ExternalGroupByPOperator(gby.getGroupByList(), physicalOptimizationConfig.getMaxFramesExternalGroupBy(), (long) physicalOptimizationConfig.getMaxFramesExternalGroupBy() * physicalOptimizationConfig.getFrameSize());
generateMergeAggregationExpressions(gby, context);
op.setPhysicalOperator(externalGby);
setToExternalGby = true;
}
}
if (!setToExternalGby) {
// if not serializable or no intermediate agg, use pre-clustered group-by
List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
ILogicalExpression expr = p.second.getValue();
if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
columnList.add(varRef.getVariableReference());
}
}
op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
}
}
} else if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.RUNNINGAGGREGATE)) {
List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
ILogicalExpression expr = p.second.getValue();
if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
columnList.add(varRef.getVariableReference());
}
}
op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
} else {
throw new AlgebricksException("Unsupported nested operator within a group-by: " + ((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().name());
}
}
}
}
if (op.getPhysicalOperator() == null) {
switch(op.getOperatorTag()) {
case INNERJOIN:
{
JoinUtils.setJoinAlgorithmAndExchangeAlgo((InnerJoinOperator) op, context);
break;
}
case LEFTOUTERJOIN:
{
JoinUtils.setJoinAlgorithmAndExchangeAlgo((LeftOuterJoinOperator) op, context);
break;
}
case UNNEST_MAP:
case LEFT_OUTER_UNNEST_MAP:
{
ILogicalExpression unnestExpr = null;
unnestExpr = ((AbstractUnnestMapOperator) op).getExpressionRef().getValue();
if (unnestExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) unnestExpr;
FunctionIdentifier fid = f.getFunctionIdentifier();
if (!fid.equals(BuiltinFunctions.INDEX_SEARCH)) {
throw new IllegalStateException();
}
AccessMethodJobGenParams jobGenParams = new AccessMethodJobGenParams();
jobGenParams.readFromFuncArgs(f.getArguments());
MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
DataSourceId dataSourceId = new DataSourceId(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
Dataset dataset = mp.findDataset(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
IDataSourceIndex<String, DataSourceId> dsi = mp.findDataSourceIndex(jobGenParams.getIndexName(), dataSourceId);
INodeDomain storageDomain = mp.findNodeDomain(dataset.getNodeGroupName());
if (dsi == null) {
throw new AlgebricksException("Could not find index " + jobGenParams.getIndexName() + " for dataset " + dataSourceId);
}
IndexType indexType = jobGenParams.getIndexType();
boolean requiresBroadcast = jobGenParams.getRequiresBroadcast();
switch(indexType) {
case BTREE:
{
BTreeJobGenParams btreeJobGenParams = new BTreeJobGenParams();
btreeJobGenParams.readFromFuncArgs(f.getArguments());
op.setPhysicalOperator(new BTreeSearchPOperator(dsi, storageDomain, requiresBroadcast, btreeJobGenParams.isPrimaryIndex(), btreeJobGenParams.isEqCondition(), btreeJobGenParams.getLowKeyVarList(), btreeJobGenParams.getHighKeyVarList()));
break;
}
case RTREE:
{
op.setPhysicalOperator(new RTreeSearchPOperator(dsi, storageDomain, requiresBroadcast));
break;
}
case SINGLE_PARTITION_WORD_INVIX:
case SINGLE_PARTITION_NGRAM_INVIX:
{
op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, false));
break;
}
case LENGTH_PARTITIONED_WORD_INVIX:
case LENGTH_PARTITIONED_NGRAM_INVIX:
{
op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, true));
break;
}
default:
{
throw new NotImplementedException(indexType + " indexes are not implemented.");
}
}
}
break;
}
}
}
if (op.hasNestedPlans()) {
AbstractOperatorWithNestedPlans nested = (AbstractOperatorWithNestedPlans) op;
for (ILogicalPlan p : nested.getNestedPlans()) {
setPhysicalOperators(p, context);
}
}
for (Mutable<ILogicalOperator> opRef : op.getInputs()) {
computeDefaultPhysicalOp((AbstractLogicalOperator) opRef.getValue(), context);
}
}
use of org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier in project asterixdb by apache.
the class IntroduceLSMComponentFilterRule method analyzeCondition.
private AccessMethodAnalysisContext analyzeCondition(ILogicalExpression cond, IOptimizationContext context, IVariableTypeEnvironment typeEnvironment) throws AlgebricksException {
AccessMethodAnalysisContext analysisCtx = new AccessMethodAnalysisContext();
AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) cond;
FunctionIdentifier funcIdent = funcExpr.getFunctionIdentifier();
if (funcIdent != AlgebricksBuiltinFunctions.OR) {
analyzeFunctionExpr(funcExpr, analysisCtx, context, typeEnvironment);
for (Mutable<ILogicalExpression> arg : funcExpr.getArguments()) {
ILogicalExpression argExpr = arg.getValue();
if (argExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
continue;
}
analyzeFunctionExpr((AbstractFunctionCallExpression) argExpr, analysisCtx, context, typeEnvironment);
}
}
return analysisCtx;
}
use of org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier in project asterixdb by apache.
the class IntroduceLSMComponentFilterRule method getFieldNameFromSubAssignTree.
private Pair<ARecordType, List<String>> getFieldNameFromSubAssignTree(IOptimizableFuncExpr optFuncExpr, AbstractLogicalOperator op, int varIndex, ARecordType recType) {
AbstractLogicalExpression expr = null;
if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
AssignOperator assignOp = (AssignOperator) op;
expr = (AbstractLogicalExpression) assignOp.getExpressions().get(varIndex).getValue();
}
if (expr == null || expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
return null;
}
AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
FunctionIdentifier funcIdent = funcExpr.getFunctionIdentifier();
if (funcIdent == BuiltinFunctions.FIELD_ACCESS_BY_NAME || funcIdent == BuiltinFunctions.FIELD_ACCESS_BY_INDEX) {
//get the variable from here. Figure out which input it came from. Go to that input!!!
ArrayList<LogicalVariable> usedVars = new ArrayList<>();
expr.getUsedVariables(usedVars);
LogicalVariable usedVar = usedVars.get(0);
List<String> returnList = new ArrayList<>();
//Find the input that it came from
for (int varCheck = 0; varCheck < op.getInputs().size(); varCheck++) {
AbstractLogicalOperator nestedOp = (AbstractLogicalOperator) op.getInputs().get(varCheck).getValue();
if (nestedOp.getOperatorTag() != LogicalOperatorTag.ASSIGN) {
if (varCheck == op.getInputs().size() - 1) {
}
} else {
int nestedAssignVar = ((AssignOperator) nestedOp).getVariables().indexOf(usedVar);
if (nestedAssignVar == -1) {
continue;
}
//get the nested info from the lower input
Pair<ARecordType, List<String>> lowerInfo = getFieldNameFromSubAssignTree(optFuncExpr, (AbstractLogicalOperator) op.getInputs().get(varCheck).getValue(), nestedAssignVar, recType);
if (lowerInfo != null) {
recType = lowerInfo.first;
returnList = lowerInfo.second;
}
}
}
if (funcIdent == BuiltinFunctions.FIELD_ACCESS_BY_NAME) {
String fieldName = ConstantExpressionUtil.getStringArgument(funcExpr, 1);
if (fieldName == null) {
return null;
}
returnList.add(fieldName);
return new Pair<>(recType, returnList);
} else if (funcIdent == BuiltinFunctions.FIELD_ACCESS_BY_INDEX) {
Integer fieldIndex = ConstantExpressionUtil.getIntArgument(funcExpr, 1);
if (fieldIndex == null) {
return null;
}
returnList.add(recType.getFieldNames()[fieldIndex]);
IAType subType = recType.getFieldTypes()[fieldIndex];
if (subType.getTypeTag() == ATypeTag.OBJECT) {
recType = (ARecordType) subType;
}
return new Pair<>(recType, returnList);
}
}
ILogicalExpression argExpr = funcExpr.getArguments().get(0).getValue();
if (argExpr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
return null;
}
return null;
}
use of org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier in project asterixdb by apache.
the class IntroduceLSMComponentFilterRule method findMacthedExprFieldName.
private boolean findMacthedExprFieldName(IOptimizableFuncExpr optFuncExpr, AbstractLogicalOperator op, Dataset dataset, ARecordType recType, List<Index> datasetIndexes, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator descendantOp = (AbstractLogicalOperator) op.getInputs().get(0).getValue();
while (descendantOp != null) {
if (descendantOp.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
AssignOperator assignOp = (AssignOperator) descendantOp;
List<LogicalVariable> varList = assignOp.getVariables();
for (int varIndex = 0; varIndex < varList.size(); varIndex++) {
LogicalVariable var = varList.get(varIndex);
int funcVarIndex = optFuncExpr.findLogicalVar(var);
if (funcVarIndex == -1) {
continue;
}
List<String> fieldName = getFieldNameFromSubAssignTree(optFuncExpr, descendantOp, varIndex, recType).second;
if (fieldName == null) {
return false;
}
optFuncExpr.setFieldName(funcVarIndex, fieldName);
return true;
}
} else if (descendantOp.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
DataSourceScanOperator scanOp = (DataSourceScanOperator) descendantOp;
List<LogicalVariable> varList = scanOp.getVariables();
for (int varIndex = 0; varIndex < varList.size(); varIndex++) {
LogicalVariable var = varList.get(varIndex);
int funcVarIndex = optFuncExpr.findLogicalVar(var);
if (funcVarIndex == -1) {
continue;
}
// The variable value is one of the partitioning fields.
List<String> fieldName = dataset.getPrimaryKeys().get(varIndex);
if (fieldName == null) {
return false;
}
optFuncExpr.setFieldName(funcVarIndex, fieldName);
return true;
}
} else if (descendantOp.getOperatorTag() == LogicalOperatorTag.UNNEST_MAP) {
UnnestMapOperator unnestMapOp = (UnnestMapOperator) descendantOp;
List<LogicalVariable> varList = unnestMapOp.getVariables();
for (int varIndex = 0; varIndex < varList.size(); varIndex++) {
LogicalVariable var = varList.get(varIndex);
int funcVarIndex = optFuncExpr.findLogicalVar(var);
if (funcVarIndex == -1) {
continue;
}
String indexName;
Index index = null;
ILogicalExpression unnestExpr = unnestMapOp.getExpressionRef().getValue();
if (unnestExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) unnestExpr;
FunctionIdentifier fid = f.getFunctionIdentifier();
if (!fid.equals(BuiltinFunctions.INDEX_SEARCH)) {
throw new IllegalStateException();
}
AccessMethodJobGenParams jobGenParams = new AccessMethodJobGenParams();
jobGenParams.readFromFuncArgs(f.getArguments());
indexName = jobGenParams.indexName;
for (Index idx : datasetIndexes) {
if (idx.getIndexName().compareTo(indexName) == 0) {
index = idx;
break;
}
}
}
IAType metaItemType = ((MetadataProvider) context.getMetadataProvider()).findType(dataset.getMetaItemTypeDataverseName(), dataset.getMetaItemTypeName());
ARecordType metaRecType = (ARecordType) metaItemType;
int numSecondaryKeys = KeyFieldTypeUtil.getNumSecondaryKeys(index, recType, metaRecType);
List<String> fieldName;
if (varIndex >= numSecondaryKeys) {
fieldName = dataset.getPrimaryKeys().get(varIndex - numSecondaryKeys);
} else {
fieldName = index.getKeyFieldNames().get(varIndex);
}
if (fieldName == null) {
return false;
}
optFuncExpr.setFieldName(funcVarIndex, fieldName);
return true;
}
}
if (descendantOp.getInputs().isEmpty()) {
break;
}
descendantOp = (AbstractLogicalOperator) descendantOp.getInputs().get(0).getValue();
}
return false;
}
use of org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier in project asterixdb by apache.
the class IntroduceLSMComponentFilterRule method getDataset.
private Dataset getDataset(AbstractLogicalOperator op, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator descendantOp = (AbstractLogicalOperator) op.getInputs().get(0).getValue();
while (descendantOp != null) {
if (descendantOp.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
DataSourceScanOperator dataSourceScanOp = (DataSourceScanOperator) descendantOp;
DataSource ds = (DataSource) dataSourceScanOp.getDataSource();
if (ds.getDatasourceType() != DataSource.Type.INTERNAL_DATASET) {
return null;
}
return ((DatasetDataSource) ds).getDataset();
} else if (descendantOp.getOperatorTag() == LogicalOperatorTag.UNNEST_MAP) {
UnnestMapOperator unnestMapOp = (UnnestMapOperator) descendantOp;
ILogicalExpression unnestExpr = unnestMapOp.getExpressionRef().getValue();
if (unnestExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) unnestExpr;
FunctionIdentifier fid = f.getFunctionIdentifier();
String dataverseName;
String datasetName;
if (BuiltinFunctions.EXTERNAL_LOOKUP.equals(fid)) {
dataverseName = AccessMethodUtils.getStringConstant(f.getArguments().get(0));
datasetName = AccessMethodUtils.getStringConstant(f.getArguments().get(1));
} else if (fid.equals(BuiltinFunctions.INDEX_SEARCH)) {
AccessMethodJobGenParams jobGenParams = new AccessMethodJobGenParams();
jobGenParams.readFromFuncArgs(f.getArguments());
dataverseName = jobGenParams.dataverseName;
datasetName = jobGenParams.datasetName;
} else {
throw new AlgebricksException("Unexpected function for Unnest Map: " + fid);
}
return ((MetadataProvider) context.getMetadataProvider()).findDataset(dataverseName, datasetName);
}
}
if (descendantOp.getInputs().isEmpty()) {
break;
}
descendantOp = (AbstractLogicalOperator) descendantOp.getInputs().get(0).getValue();
}
return null;
}
Aggregations