Search in sources :

Example 6 with DatasetDataSource

use of org.apache.asterix.metadata.declared.DatasetDataSource in project asterixdb by apache.

the class LangExpressionToPlanTranslator method translateLoad.

@Override
public ILogicalPlan translateLoad(ICompiledDmlStatement stmt) throws AlgebricksException {
    CompiledLoadFromFileStatement clffs = (CompiledLoadFromFileStatement) stmt;
    Dataset dataset = metadataProvider.findDataset(clffs.getDataverseName(), clffs.getDatasetName());
    if (dataset == null) {
        // This would never happen since we check for this in AqlTranslator
        throw new AlgebricksException("Unable to load dataset " + clffs.getDatasetName() + " since it does not exist");
    }
    IAType itemType = metadataProvider.findType(dataset.getItemTypeDataverseName(), dataset.getItemTypeName());
    IAType metaItemType = metadataProvider.findType(dataset.getMetaItemTypeDataverseName(), dataset.getMetaItemTypeName());
    DatasetDataSource targetDatasource = validateDatasetInfo(metadataProvider, stmt.getDataverseName(), stmt.getDatasetName());
    List<List<String>> partitionKeys = targetDatasource.getDataset().getPrimaryKeys();
    if (dataset.hasMetaPart()) {
        throw new AlgebricksException(dataset.getDatasetName() + ": load dataset is not supported on Datasets with Meta records");
    }
    LoadableDataSource lds;
    try {
        lds = new LoadableDataSource(dataset, itemType, metaItemType, clffs.getAdapter(), clffs.getProperties());
    } catch (IOException e) {
        throw new AlgebricksException(e);
    }
    // etsOp is a dummy input operator used to keep the compiler happy. it
    // could be removed but would result in
    // the need to fix many rewrite rules that assume that datasourcescan
    // operators always have input.
    ILogicalOperator etsOp = new EmptyTupleSourceOperator();
    // Add a logical variable for the record.
    List<LogicalVariable> payloadVars = new ArrayList<>();
    payloadVars.add(context.newVar());
    // Create a scan operator and make the empty tuple source its input
    DataSourceScanOperator dssOp = new DataSourceScanOperator(payloadVars, lds);
    dssOp.getInputs().add(new MutableObject<>(etsOp));
    ILogicalExpression payloadExpr = new VariableReferenceExpression(payloadVars.get(0));
    Mutable<ILogicalExpression> payloadRef = new MutableObject<>(payloadExpr);
    // Creating the assign to extract the PK out of the record
    ArrayList<LogicalVariable> pkVars = new ArrayList<>();
    ArrayList<Mutable<ILogicalExpression>> pkExprs = new ArrayList<>();
    List<Mutable<ILogicalExpression>> varRefsForLoading = new ArrayList<>();
    LogicalVariable payloadVar = payloadVars.get(0);
    for (List<String> keyFieldName : partitionKeys) {
        PlanTranslationUtil.prepareVarAndExpression(keyFieldName, payloadVar, pkVars, pkExprs, varRefsForLoading, context);
    }
    AssignOperator assign = new AssignOperator(pkVars, pkExprs);
    assign.getInputs().add(new MutableObject<>(dssOp));
    // If the input is pre-sorted, we set the ordering property explicitly in the assign
    if (clffs.alreadySorted()) {
        List<OrderColumn> orderColumns = new ArrayList<>();
        for (int i = 0; i < pkVars.size(); ++i) {
            orderColumns.add(new OrderColumn(pkVars.get(i), OrderKind.ASC));
        }
        assign.setExplicitOrderingProperty(new LocalOrderProperty(orderColumns));
    }
    List<String> additionalFilteringField = DatasetUtil.getFilterField(targetDatasource.getDataset());
    List<LogicalVariable> additionalFilteringVars;
    List<Mutable<ILogicalExpression>> additionalFilteringAssignExpressions;
    List<Mutable<ILogicalExpression>> additionalFilteringExpressions = null;
    AssignOperator additionalFilteringAssign = null;
    if (additionalFilteringField != null) {
        additionalFilteringVars = new ArrayList<>();
        additionalFilteringAssignExpressions = new ArrayList<>();
        additionalFilteringExpressions = new ArrayList<>();
        PlanTranslationUtil.prepareVarAndExpression(additionalFilteringField, payloadVar, additionalFilteringVars, additionalFilteringAssignExpressions, additionalFilteringExpressions, context);
        additionalFilteringAssign = new AssignOperator(additionalFilteringVars, additionalFilteringAssignExpressions);
    }
    InsertDeleteUpsertOperator insertOp = new InsertDeleteUpsertOperator(targetDatasource, payloadRef, varRefsForLoading, InsertDeleteUpsertOperator.Kind.INSERT, true);
    insertOp.setAdditionalFilteringExpressions(additionalFilteringExpressions);
    if (additionalFilteringAssign != null) {
        additionalFilteringAssign.getInputs().add(new MutableObject<>(assign));
        insertOp.getInputs().add(new MutableObject<>(additionalFilteringAssign));
    } else {
        insertOp.getInputs().add(new MutableObject<>(assign));
    }
    ILogicalOperator leafOperator = new SinkOperator();
    leafOperator.getInputs().add(new MutableObject<>(insertOp));
    return new ALogicalPlanImpl(new MutableObject<>(leafOperator));
}
Also used : CompiledLoadFromFileStatement(org.apache.asterix.translator.CompiledStatements.CompiledLoadFromFileStatement) ArrayList(java.util.ArrayList) DatasetDataSource(org.apache.asterix.metadata.declared.DatasetDataSource) AString(org.apache.asterix.om.base.AString) DataSourceScanOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator) ALogicalPlanImpl(org.apache.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl) ArrayList(java.util.ArrayList) List(java.util.List) LoadableDataSource(org.apache.asterix.metadata.declared.LoadableDataSource) EmptyTupleSourceOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator) MutableObject(org.apache.commons.lang3.mutable.MutableObject) LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) SinkOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.SinkOperator) Dataset(org.apache.asterix.metadata.entities.Dataset) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) OrderColumn(org.apache.hyracks.algebricks.core.algebra.properties.OrderColumn) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) IOException(java.io.IOException) AssignOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator) Mutable(org.apache.commons.lang3.mutable.Mutable) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) InsertDeleteUpsertOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.InsertDeleteUpsertOperator) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) LocalOrderProperty(org.apache.hyracks.algebricks.core.algebra.properties.LocalOrderProperty) IAType(org.apache.asterix.om.types.IAType)

Example 7 with DatasetDataSource

use of org.apache.asterix.metadata.declared.DatasetDataSource in project asterixdb by apache.

the class IntroduceLSMComponentFilterRule method getDataset.

private Dataset getDataset(AbstractLogicalOperator op, IOptimizationContext context) throws AlgebricksException {
    AbstractLogicalOperator descendantOp = (AbstractLogicalOperator) op.getInputs().get(0).getValue();
    while (descendantOp != null) {
        if (descendantOp.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
            DataSourceScanOperator dataSourceScanOp = (DataSourceScanOperator) descendantOp;
            DataSource ds = (DataSource) dataSourceScanOp.getDataSource();
            if (ds.getDatasourceType() != DataSource.Type.INTERNAL_DATASET) {
                return null;
            }
            return ((DatasetDataSource) ds).getDataset();
        } else if (descendantOp.getOperatorTag() == LogicalOperatorTag.UNNEST_MAP) {
            UnnestMapOperator unnestMapOp = (UnnestMapOperator) descendantOp;
            ILogicalExpression unnestExpr = unnestMapOp.getExpressionRef().getValue();
            if (unnestExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
                AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) unnestExpr;
                FunctionIdentifier fid = f.getFunctionIdentifier();
                String dataverseName;
                String datasetName;
                if (BuiltinFunctions.EXTERNAL_LOOKUP.equals(fid)) {
                    dataverseName = AccessMethodUtils.getStringConstant(f.getArguments().get(0));
                    datasetName = AccessMethodUtils.getStringConstant(f.getArguments().get(1));
                } else if (fid.equals(BuiltinFunctions.INDEX_SEARCH)) {
                    AccessMethodJobGenParams jobGenParams = new AccessMethodJobGenParams();
                    jobGenParams.readFromFuncArgs(f.getArguments());
                    dataverseName = jobGenParams.dataverseName;
                    datasetName = jobGenParams.datasetName;
                } else {
                    throw new AlgebricksException("Unexpected function for Unnest Map: " + fid);
                }
                return ((MetadataProvider) context.getMetadataProvider()).findDataset(dataverseName, datasetName);
            }
        }
        if (descendantOp.getInputs().isEmpty()) {
            break;
        }
        descendantOp = (AbstractLogicalOperator) descendantOp.getInputs().get(0).getValue();
    }
    return null;
}
Also used : FunctionIdentifier(org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) DataSourceScanOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator) UnnestMapOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator) AbstractFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) DatasetDataSource(org.apache.asterix.metadata.declared.DatasetDataSource) DataSource(org.apache.asterix.metadata.declared.DataSource) DatasetDataSource(org.apache.asterix.metadata.declared.DatasetDataSource)

Example 8 with DatasetDataSource

use of org.apache.asterix.metadata.declared.DatasetDataSource in project asterixdb by apache.

the class IntroduceMaterializationForInsertWithSelfScanRule method rewritePost.

@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
    AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
    if (op.getOperatorTag() != LogicalOperatorTag.INSERT_DELETE_UPSERT) {
        return false;
    }
    InsertDeleteUpsertOperator insertOp = (InsertDeleteUpsertOperator) op;
    boolean sameDataset = checkIfInsertAndScanDatasetsSame(op, ((DatasetDataSource) insertOp.getDataSource()).getDataset().getDatasetName());
    if (sameDataset) {
        MaterializeOperator materializeOperator = new MaterializeOperator();
        MaterializePOperator materializePOperator = new MaterializePOperator(true);
        materializeOperator.setPhysicalOperator(materializePOperator);
        materializeOperator.getInputs().add(new MutableObject<ILogicalOperator>(insertOp.getInputs().get(0).getValue()));
        context.computeAndSetTypeEnvironmentForOperator(materializeOperator);
        insertOp.getInputs().clear();
        insertOp.getInputs().add(new MutableObject<ILogicalOperator>(materializeOperator));
        context.computeAndSetTypeEnvironmentForOperator(insertOp);
        return true;
    } else {
        return false;
    }
}
Also used : MaterializeOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.MaterializeOperator) AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) InsertDeleteUpsertOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.InsertDeleteUpsertOperator) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) DatasetDataSource(org.apache.asterix.metadata.declared.DatasetDataSource) MaterializePOperator(org.apache.hyracks.algebricks.core.algebra.operators.physical.MaterializePOperator)

Example 9 with DatasetDataSource

use of org.apache.asterix.metadata.declared.DatasetDataSource in project asterixdb by apache.

the class IntroduceAutogenerateIDRule method rewritePost.

@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
    // match: commit OR distribute-result OR SINK - ... followed by:
    // [insert to internal dataset with autogenerated id] - assign - project
    // produce: insert - assign - assign* - project
    // **
    // OR [insert to internal dataset with autogenerated id] - assign - [datasource scan]
    // produce insert - assign - assign* - datasource scan
    AbstractLogicalOperator currentOp = (AbstractLogicalOperator) opRef.getValue();
    if (currentOp.getOperatorTag() == LogicalOperatorTag.DELEGATE_OPERATOR) {
        DelegateOperator dOp = (DelegateOperator) currentOp;
        if (!(dOp.getDelegate() instanceof CommitOperator)) {
            return false;
        } else if (!((CommitOperator) dOp.getDelegate()).isSink()) {
            return false;
        }
    } else if (currentOp.getOperatorTag() != LogicalOperatorTag.DISTRIBUTE_RESULT && currentOp.getOperatorTag() != LogicalOperatorTag.SINK) {
        return false;
    }
    ArrayDeque<AbstractLogicalOperator> opStack = new ArrayDeque<>();
    opStack.push(currentOp);
    while (currentOp.getInputs().size() == 1) {
        currentOp = (AbstractLogicalOperator) currentOp.getInputs().get(0).getValue();
        if (currentOp.getOperatorTag() == LogicalOperatorTag.INSERT_DELETE_UPSERT) {
            break;
        }
        opStack.push(currentOp);
    }
    if (currentOp.getOperatorTag() != LogicalOperatorTag.INSERT_DELETE_UPSERT) {
        return false;
    }
    InsertDeleteUpsertOperator insertOp = (InsertDeleteUpsertOperator) currentOp;
    if (insertOp.getOperation() != Kind.INSERT && insertOp.getOperation() != Kind.UPSERT) {
        return false;
    }
    DatasetDataSource dds = (DatasetDataSource) insertOp.getDataSource();
    boolean autogenerated = ((InternalDatasetDetails) dds.getDataset().getDatasetDetails()).isAutogenerated();
    if (!autogenerated) {
        return false;
    }
    if (((DataSource) insertOp.getDataSource()).getDatasourceType() != Type.INTERNAL_DATASET) {
        return false;
    }
    AbstractLogicalOperator parentOp = (AbstractLogicalOperator) currentOp.getInputs().get(0).getValue();
    if (parentOp.getOperatorTag() != LogicalOperatorTag.ASSIGN) {
        return false;
    }
    AssignOperator assignOp = (AssignOperator) parentOp;
    LogicalVariable inputRecord;
    //TODO: bug here. will not work for internal datasets with filters since the pattern becomes 
    //[project-assign-assign-insert]
    AbstractLogicalOperator grandparentOp = (AbstractLogicalOperator) parentOp.getInputs().get(0).getValue();
    if (grandparentOp.getOperatorTag() == LogicalOperatorTag.PROJECT) {
        ProjectOperator projectOp = (ProjectOperator) grandparentOp;
        inputRecord = projectOp.getVariables().get(0);
    } else if (grandparentOp.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
        DataSourceScanOperator dssOp = (DataSourceScanOperator) grandparentOp;
        inputRecord = dssOp.getVariables().get(0);
    } else {
        return false;
    }
    List<String> pkFieldName = ((InternalDatasetDetails) dds.getDataset().getDatasetDetails()).getPrimaryKey().get(0);
    ILogicalExpression rec0 = new VariableReferenceExpression(inputRecord);
    ILogicalExpression rec1 = createPrimaryKeyRecordExpression(pkFieldName);
    ILogicalExpression mergedRec = createRecordMergeFunction(rec0, rec1);
    ILogicalExpression nonNullMergedRec = createNotNullFunction(mergedRec);
    LogicalVariable v = context.newVar();
    AssignOperator newAssign = new AssignOperator(v, new MutableObject<ILogicalExpression>(nonNullMergedRec));
    newAssign.getInputs().add(new MutableObject<ILogicalOperator>(grandparentOp));
    assignOp.getInputs().set(0, new MutableObject<ILogicalOperator>(newAssign));
    VariableUtilities.substituteVariables(assignOp, inputRecord, v, context);
    VariableUtilities.substituteVariables(insertOp, inputRecord, v, context);
    context.computeAndSetTypeEnvironmentForOperator(newAssign);
    context.computeAndSetTypeEnvironmentForOperator(assignOp);
    context.computeAndSetTypeEnvironmentForOperator(insertOp);
    ;
    for (AbstractLogicalOperator op : opStack) {
        VariableUtilities.substituteVariables(op, inputRecord, v, context);
        context.computeAndSetTypeEnvironmentForOperator(op);
    }
    return true;
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) ProjectOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator) InternalDatasetDetails(org.apache.asterix.metadata.entities.InternalDatasetDetails) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) DatasetDataSource(org.apache.asterix.metadata.declared.DatasetDataSource) AString(org.apache.asterix.om.base.AString) AssignOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator) ArrayDeque(java.util.ArrayDeque) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) InsertDeleteUpsertOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.InsertDeleteUpsertOperator) DataSourceScanOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator) DelegateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.DelegateOperator) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) CommitOperator(org.apache.asterix.algebra.operators.CommitOperator)

Example 10 with DatasetDataSource

use of org.apache.asterix.metadata.declared.DatasetDataSource in project asterixdb by apache.

the class RemoveUnusedOneToOneEquiJoinRule method removeJoinFromInputBranch.

private int removeJoinFromInputBranch(Mutable<ILogicalOperator> opRef) throws AlgebricksException {
    AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
    if (op.getOperatorTag() != LogicalOperatorTag.INNERJOIN) {
        return -1;
    }
    AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) op;
    // Make sure the join is an equi-join.
    if (!isEquiJoin(joinOp.getCondition())) {
        return -1;
    }
    int unusedJoinBranchIndex = -1;
    for (int i = 0; i < joinOp.getInputs().size(); i++) {
        liveVars.clear();
        VariableUtilities.getLiveVariables(joinOp.getInputs().get(i).getValue(), liveVars);
        if (liveVars.isEmpty()) {
            // The branch does not produce any variable, i.e., it only contains an empty tuple source.
            return i;
        }
        liveVars.retainAll(parentsUsedVars);
        if (liveVars.isEmpty()) {
            // None of the live variables from this branch are used by its parents.
            unusedJoinBranchIndex = i;
            break;
        }
    }
    if (unusedJoinBranchIndex < 0) {
        // The variables from both branches are used in the upstream plan. We cannot remove this join.
        return -1;
    }
    // Check whether one of the join branches is unused.
    usedVars.clear();
    VariableUtilities.getUsedVariables(joinOp, usedVars);
    // Check whether all used variables originate from primary keys of exactly the same dataset.
    // Collect a list of datascans whose primary key variables are used in the join condition.
    gatherProducingDataScans(opRef, usedVars, dataScans);
    if (dataScans.size() < 2) {
        // Either branch does not use its primary key in the join condition.
        return -1;
    }
    // only used primary key variables of those datascans.
    for (int i = 0; i < dataScans.size(); i++) {
        if (i > 0) {
            DatasetDataSource prevAqlDataSource = (DatasetDataSource) dataScans.get(i - 1).getDataSource();
            DatasetDataSource currAqlDataSource = (DatasetDataSource) dataScans.get(i).getDataSource();
            if (!prevAqlDataSource.getDataset().equals(currAqlDataSource.getDataset())) {
                return -1;
            }
        }
        // Remove from the used variables all the primary key vars of this dataset.
        fillPKVars(dataScans.get(i), pkVars);
        usedVars.removeAll(pkVars);
    }
    if (!usedVars.isEmpty()) {
        // keys from datasource scans of the same dataset.
        return -1;
    }
    // We expect the post-plan will NOT prune the join part derived from A.
    if (unusedJoinBranchIndex >= 0 && isSelectionAboveDataScan(opRef.getValue().getInputs().get(unusedJoinBranchIndex))) {
        unusedJoinBranchIndex = -1;
    }
    return unusedJoinBranchIndex;
}
Also used : AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) DatasetDataSource(org.apache.asterix.metadata.declared.DatasetDataSource) AbstractBinaryJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator)

Aggregations

DatasetDataSource (org.apache.asterix.metadata.declared.DatasetDataSource)12 ILogicalExpression (org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression)7 AbstractLogicalOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator)7 LogicalVariable (org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable)5 VariableReferenceExpression (org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression)5 DataSourceScanOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator)5 ArrayList (java.util.ArrayList)4 DataSource (org.apache.asterix.metadata.declared.DataSource)4 Mutable (org.apache.commons.lang3.mutable.Mutable)4 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)4 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)4 AssignOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator)4 InsertDeleteUpsertOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.InsertDeleteUpsertOperator)4 List (java.util.List)3 Dataset (org.apache.asterix.metadata.entities.Dataset)3 InternalDatasetDetails (org.apache.asterix.metadata.entities.InternalDatasetDetails)3 AString (org.apache.asterix.om.base.AString)3 IAType (org.apache.asterix.om.types.IAType)3 MutableObject (org.apache.commons.lang3.mutable.MutableObject)3 AbstractFunctionCallExpression (org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression)3