use of org.apache.asterix.metadata.declared.DatasetDataSource in project asterixdb by apache.
the class LangExpressionToPlanTranslator method translateLoad.
@Override
public ILogicalPlan translateLoad(ICompiledDmlStatement stmt) throws AlgebricksException {
CompiledLoadFromFileStatement clffs = (CompiledLoadFromFileStatement) stmt;
Dataset dataset = metadataProvider.findDataset(clffs.getDataverseName(), clffs.getDatasetName());
if (dataset == null) {
// This would never happen since we check for this in AqlTranslator
throw new AlgebricksException("Unable to load dataset " + clffs.getDatasetName() + " since it does not exist");
}
IAType itemType = metadataProvider.findType(dataset.getItemTypeDataverseName(), dataset.getItemTypeName());
IAType metaItemType = metadataProvider.findType(dataset.getMetaItemTypeDataverseName(), dataset.getMetaItemTypeName());
DatasetDataSource targetDatasource = validateDatasetInfo(metadataProvider, stmt.getDataverseName(), stmt.getDatasetName());
List<List<String>> partitionKeys = targetDatasource.getDataset().getPrimaryKeys();
if (dataset.hasMetaPart()) {
throw new AlgebricksException(dataset.getDatasetName() + ": load dataset is not supported on Datasets with Meta records");
}
LoadableDataSource lds;
try {
lds = new LoadableDataSource(dataset, itemType, metaItemType, clffs.getAdapter(), clffs.getProperties());
} catch (IOException e) {
throw new AlgebricksException(e);
}
// etsOp is a dummy input operator used to keep the compiler happy. it
// could be removed but would result in
// the need to fix many rewrite rules that assume that datasourcescan
// operators always have input.
ILogicalOperator etsOp = new EmptyTupleSourceOperator();
// Add a logical variable for the record.
List<LogicalVariable> payloadVars = new ArrayList<>();
payloadVars.add(context.newVar());
// Create a scan operator and make the empty tuple source its input
DataSourceScanOperator dssOp = new DataSourceScanOperator(payloadVars, lds);
dssOp.getInputs().add(new MutableObject<>(etsOp));
ILogicalExpression payloadExpr = new VariableReferenceExpression(payloadVars.get(0));
Mutable<ILogicalExpression> payloadRef = new MutableObject<>(payloadExpr);
// Creating the assign to extract the PK out of the record
ArrayList<LogicalVariable> pkVars = new ArrayList<>();
ArrayList<Mutable<ILogicalExpression>> pkExprs = new ArrayList<>();
List<Mutable<ILogicalExpression>> varRefsForLoading = new ArrayList<>();
LogicalVariable payloadVar = payloadVars.get(0);
for (List<String> keyFieldName : partitionKeys) {
PlanTranslationUtil.prepareVarAndExpression(keyFieldName, payloadVar, pkVars, pkExprs, varRefsForLoading, context);
}
AssignOperator assign = new AssignOperator(pkVars, pkExprs);
assign.getInputs().add(new MutableObject<>(dssOp));
// If the input is pre-sorted, we set the ordering property explicitly in the assign
if (clffs.alreadySorted()) {
List<OrderColumn> orderColumns = new ArrayList<>();
for (int i = 0; i < pkVars.size(); ++i) {
orderColumns.add(new OrderColumn(pkVars.get(i), OrderKind.ASC));
}
assign.setExplicitOrderingProperty(new LocalOrderProperty(orderColumns));
}
List<String> additionalFilteringField = DatasetUtil.getFilterField(targetDatasource.getDataset());
List<LogicalVariable> additionalFilteringVars;
List<Mutable<ILogicalExpression>> additionalFilteringAssignExpressions;
List<Mutable<ILogicalExpression>> additionalFilteringExpressions = null;
AssignOperator additionalFilteringAssign = null;
if (additionalFilteringField != null) {
additionalFilteringVars = new ArrayList<>();
additionalFilteringAssignExpressions = new ArrayList<>();
additionalFilteringExpressions = new ArrayList<>();
PlanTranslationUtil.prepareVarAndExpression(additionalFilteringField, payloadVar, additionalFilteringVars, additionalFilteringAssignExpressions, additionalFilteringExpressions, context);
additionalFilteringAssign = new AssignOperator(additionalFilteringVars, additionalFilteringAssignExpressions);
}
InsertDeleteUpsertOperator insertOp = new InsertDeleteUpsertOperator(targetDatasource, payloadRef, varRefsForLoading, InsertDeleteUpsertOperator.Kind.INSERT, true);
insertOp.setAdditionalFilteringExpressions(additionalFilteringExpressions);
if (additionalFilteringAssign != null) {
additionalFilteringAssign.getInputs().add(new MutableObject<>(assign));
insertOp.getInputs().add(new MutableObject<>(additionalFilteringAssign));
} else {
insertOp.getInputs().add(new MutableObject<>(assign));
}
ILogicalOperator leafOperator = new SinkOperator();
leafOperator.getInputs().add(new MutableObject<>(insertOp));
return new ALogicalPlanImpl(new MutableObject<>(leafOperator));
}
use of org.apache.asterix.metadata.declared.DatasetDataSource in project asterixdb by apache.
the class IntroduceLSMComponentFilterRule method getDataset.
private Dataset getDataset(AbstractLogicalOperator op, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator descendantOp = (AbstractLogicalOperator) op.getInputs().get(0).getValue();
while (descendantOp != null) {
if (descendantOp.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
DataSourceScanOperator dataSourceScanOp = (DataSourceScanOperator) descendantOp;
DataSource ds = (DataSource) dataSourceScanOp.getDataSource();
if (ds.getDatasourceType() != DataSource.Type.INTERNAL_DATASET) {
return null;
}
return ((DatasetDataSource) ds).getDataset();
} else if (descendantOp.getOperatorTag() == LogicalOperatorTag.UNNEST_MAP) {
UnnestMapOperator unnestMapOp = (UnnestMapOperator) descendantOp;
ILogicalExpression unnestExpr = unnestMapOp.getExpressionRef().getValue();
if (unnestExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) unnestExpr;
FunctionIdentifier fid = f.getFunctionIdentifier();
String dataverseName;
String datasetName;
if (BuiltinFunctions.EXTERNAL_LOOKUP.equals(fid)) {
dataverseName = AccessMethodUtils.getStringConstant(f.getArguments().get(0));
datasetName = AccessMethodUtils.getStringConstant(f.getArguments().get(1));
} else if (fid.equals(BuiltinFunctions.INDEX_SEARCH)) {
AccessMethodJobGenParams jobGenParams = new AccessMethodJobGenParams();
jobGenParams.readFromFuncArgs(f.getArguments());
dataverseName = jobGenParams.dataverseName;
datasetName = jobGenParams.datasetName;
} else {
throw new AlgebricksException("Unexpected function for Unnest Map: " + fid);
}
return ((MetadataProvider) context.getMetadataProvider()).findDataset(dataverseName, datasetName);
}
}
if (descendantOp.getInputs().isEmpty()) {
break;
}
descendantOp = (AbstractLogicalOperator) descendantOp.getInputs().get(0).getValue();
}
return null;
}
use of org.apache.asterix.metadata.declared.DatasetDataSource in project asterixdb by apache.
the class IntroduceMaterializationForInsertWithSelfScanRule method rewritePost.
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
if (op.getOperatorTag() != LogicalOperatorTag.INSERT_DELETE_UPSERT) {
return false;
}
InsertDeleteUpsertOperator insertOp = (InsertDeleteUpsertOperator) op;
boolean sameDataset = checkIfInsertAndScanDatasetsSame(op, ((DatasetDataSource) insertOp.getDataSource()).getDataset().getDatasetName());
if (sameDataset) {
MaterializeOperator materializeOperator = new MaterializeOperator();
MaterializePOperator materializePOperator = new MaterializePOperator(true);
materializeOperator.setPhysicalOperator(materializePOperator);
materializeOperator.getInputs().add(new MutableObject<ILogicalOperator>(insertOp.getInputs().get(0).getValue()));
context.computeAndSetTypeEnvironmentForOperator(materializeOperator);
insertOp.getInputs().clear();
insertOp.getInputs().add(new MutableObject<ILogicalOperator>(materializeOperator));
context.computeAndSetTypeEnvironmentForOperator(insertOp);
return true;
} else {
return false;
}
}
use of org.apache.asterix.metadata.declared.DatasetDataSource in project asterixdb by apache.
the class IntroduceAutogenerateIDRule method rewritePost.
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
// match: commit OR distribute-result OR SINK - ... followed by:
// [insert to internal dataset with autogenerated id] - assign - project
// produce: insert - assign - assign* - project
// **
// OR [insert to internal dataset with autogenerated id] - assign - [datasource scan]
// produce insert - assign - assign* - datasource scan
AbstractLogicalOperator currentOp = (AbstractLogicalOperator) opRef.getValue();
if (currentOp.getOperatorTag() == LogicalOperatorTag.DELEGATE_OPERATOR) {
DelegateOperator dOp = (DelegateOperator) currentOp;
if (!(dOp.getDelegate() instanceof CommitOperator)) {
return false;
} else if (!((CommitOperator) dOp.getDelegate()).isSink()) {
return false;
}
} else if (currentOp.getOperatorTag() != LogicalOperatorTag.DISTRIBUTE_RESULT && currentOp.getOperatorTag() != LogicalOperatorTag.SINK) {
return false;
}
ArrayDeque<AbstractLogicalOperator> opStack = new ArrayDeque<>();
opStack.push(currentOp);
while (currentOp.getInputs().size() == 1) {
currentOp = (AbstractLogicalOperator) currentOp.getInputs().get(0).getValue();
if (currentOp.getOperatorTag() == LogicalOperatorTag.INSERT_DELETE_UPSERT) {
break;
}
opStack.push(currentOp);
}
if (currentOp.getOperatorTag() != LogicalOperatorTag.INSERT_DELETE_UPSERT) {
return false;
}
InsertDeleteUpsertOperator insertOp = (InsertDeleteUpsertOperator) currentOp;
if (insertOp.getOperation() != Kind.INSERT && insertOp.getOperation() != Kind.UPSERT) {
return false;
}
DatasetDataSource dds = (DatasetDataSource) insertOp.getDataSource();
boolean autogenerated = ((InternalDatasetDetails) dds.getDataset().getDatasetDetails()).isAutogenerated();
if (!autogenerated) {
return false;
}
if (((DataSource) insertOp.getDataSource()).getDatasourceType() != Type.INTERNAL_DATASET) {
return false;
}
AbstractLogicalOperator parentOp = (AbstractLogicalOperator) currentOp.getInputs().get(0).getValue();
if (parentOp.getOperatorTag() != LogicalOperatorTag.ASSIGN) {
return false;
}
AssignOperator assignOp = (AssignOperator) parentOp;
LogicalVariable inputRecord;
//TODO: bug here. will not work for internal datasets with filters since the pattern becomes
//[project-assign-assign-insert]
AbstractLogicalOperator grandparentOp = (AbstractLogicalOperator) parentOp.getInputs().get(0).getValue();
if (grandparentOp.getOperatorTag() == LogicalOperatorTag.PROJECT) {
ProjectOperator projectOp = (ProjectOperator) grandparentOp;
inputRecord = projectOp.getVariables().get(0);
} else if (grandparentOp.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
DataSourceScanOperator dssOp = (DataSourceScanOperator) grandparentOp;
inputRecord = dssOp.getVariables().get(0);
} else {
return false;
}
List<String> pkFieldName = ((InternalDatasetDetails) dds.getDataset().getDatasetDetails()).getPrimaryKey().get(0);
ILogicalExpression rec0 = new VariableReferenceExpression(inputRecord);
ILogicalExpression rec1 = createPrimaryKeyRecordExpression(pkFieldName);
ILogicalExpression mergedRec = createRecordMergeFunction(rec0, rec1);
ILogicalExpression nonNullMergedRec = createNotNullFunction(mergedRec);
LogicalVariable v = context.newVar();
AssignOperator newAssign = new AssignOperator(v, new MutableObject<ILogicalExpression>(nonNullMergedRec));
newAssign.getInputs().add(new MutableObject<ILogicalOperator>(grandparentOp));
assignOp.getInputs().set(0, new MutableObject<ILogicalOperator>(newAssign));
VariableUtilities.substituteVariables(assignOp, inputRecord, v, context);
VariableUtilities.substituteVariables(insertOp, inputRecord, v, context);
context.computeAndSetTypeEnvironmentForOperator(newAssign);
context.computeAndSetTypeEnvironmentForOperator(assignOp);
context.computeAndSetTypeEnvironmentForOperator(insertOp);
;
for (AbstractLogicalOperator op : opStack) {
VariableUtilities.substituteVariables(op, inputRecord, v, context);
context.computeAndSetTypeEnvironmentForOperator(op);
}
return true;
}
use of org.apache.asterix.metadata.declared.DatasetDataSource in project asterixdb by apache.
the class RemoveUnusedOneToOneEquiJoinRule method removeJoinFromInputBranch.
private int removeJoinFromInputBranch(Mutable<ILogicalOperator> opRef) throws AlgebricksException {
AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
if (op.getOperatorTag() != LogicalOperatorTag.INNERJOIN) {
return -1;
}
AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) op;
// Make sure the join is an equi-join.
if (!isEquiJoin(joinOp.getCondition())) {
return -1;
}
int unusedJoinBranchIndex = -1;
for (int i = 0; i < joinOp.getInputs().size(); i++) {
liveVars.clear();
VariableUtilities.getLiveVariables(joinOp.getInputs().get(i).getValue(), liveVars);
if (liveVars.isEmpty()) {
// The branch does not produce any variable, i.e., it only contains an empty tuple source.
return i;
}
liveVars.retainAll(parentsUsedVars);
if (liveVars.isEmpty()) {
// None of the live variables from this branch are used by its parents.
unusedJoinBranchIndex = i;
break;
}
}
if (unusedJoinBranchIndex < 0) {
// The variables from both branches are used in the upstream plan. We cannot remove this join.
return -1;
}
// Check whether one of the join branches is unused.
usedVars.clear();
VariableUtilities.getUsedVariables(joinOp, usedVars);
// Check whether all used variables originate from primary keys of exactly the same dataset.
// Collect a list of datascans whose primary key variables are used in the join condition.
gatherProducingDataScans(opRef, usedVars, dataScans);
if (dataScans.size() < 2) {
// Either branch does not use its primary key in the join condition.
return -1;
}
// only used primary key variables of those datascans.
for (int i = 0; i < dataScans.size(); i++) {
if (i > 0) {
DatasetDataSource prevAqlDataSource = (DatasetDataSource) dataScans.get(i - 1).getDataSource();
DatasetDataSource currAqlDataSource = (DatasetDataSource) dataScans.get(i).getDataSource();
if (!prevAqlDataSource.getDataset().equals(currAqlDataSource.getDataset())) {
return -1;
}
}
// Remove from the used variables all the primary key vars of this dataset.
fillPKVars(dataScans.get(i), pkVars);
usedVars.removeAll(pkVars);
}
if (!usedVars.isEmpty()) {
// keys from datasource scans of the same dataset.
return -1;
}
// We expect the post-plan will NOT prune the join part derived from A.
if (unusedJoinBranchIndex >= 0 && isSelectionAboveDataScan(opRef.getValue().getInputs().get(unusedJoinBranchIndex))) {
unusedJoinBranchIndex = -1;
}
return unusedJoinBranchIndex;
}
Aggregations