use of org.apache.commons.lang3.tuple.Pair in project asterixdb by apache.
the class InlineLeftNtsInSubplanJoinFlatteningVisitor method visitOrderOperator.
@Override
public ILogicalOperator visitOrderOperator(OrderOperator op, Void arg) throws AlgebricksException {
boolean underJoin = hasJoinAncestor;
visitSingleInputOperator(op);
if (!rewritten || !underJoin) {
return op;
}
// Adjust the ordering if its input operator pipeline has been rewritten.
List<Pair<IOrder, Mutable<ILogicalExpression>>> orderExprList = new ArrayList<>();
// Adds keyVars to the prefix of sorting columns.
for (LogicalVariable liveVar : liveVarsFromSubplanInput) {
orderExprList.add(new Pair<IOrder, Mutable<ILogicalExpression>>(OrderOperator.ASC_ORDER, new MutableObject<ILogicalExpression>(new VariableReferenceExpression(liveVar))));
}
orderExprList.addAll(op.getOrderExpressions());
// Creates an order operator with the new expression list.
OrderOperator orderOp = new OrderOperator(orderExprList);
orderOp.getInputs().addAll(op.getInputs());
context.computeAndSetTypeEnvironmentForOperator(orderOp);
return orderOp;
}
use of org.apache.commons.lang3.tuple.Pair in project asterixdb by apache.
the class IntroduceSelectAccessMethodRule method intersectAllSecondaryIndexes.
/**
* Construct all applicable secondary index-based access paths in the given selection plan and
* intersect them using INTERSECT operator to guide to the common primary index search.
* In case where the applicable index is one, we only construct one path.
*/
private boolean intersectAllSecondaryIndexes(List<Pair<IAccessMethod, Index>> chosenIndexes, Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs, IOptimizationContext context) throws AlgebricksException {
Pair<IAccessMethod, Index> chosenIndex = null;
Optional<Pair<IAccessMethod, Index>> primaryIndex = chosenIndexes.stream().filter(pair -> pair.second.isPrimaryIndex()).findFirst();
if (chosenIndexes.size() == 1) {
chosenIndex = chosenIndexes.get(0);
} else if (primaryIndex.isPresent()) {
// one primary + secondary indexes, choose the primary index directly.
chosenIndex = primaryIndex.get();
}
if (chosenIndex != null) {
AccessMethodAnalysisContext analysisCtx = analyzedAMs.get(chosenIndex.first);
return chosenIndex.first.applySelectPlanTransformation(afterSelectRefs, selectRef, subTree, chosenIndex.second, analysisCtx, context);
}
// Intersect all secondary indexes, and postpone the primary index search.
Mutable<ILogicalExpression> conditionRef = selectOp.getCondition();
List<ILogicalOperator> subRoots = new ArrayList<>();
for (Pair<IAccessMethod, Index> pair : chosenIndexes) {
AccessMethodAnalysisContext analysisCtx = analyzedAMs.get(pair.first);
subRoots.add(pair.first.createSecondaryToPrimaryPlan(conditionRef, subTree, null, pair.second, analysisCtx, AccessMethodUtils.retainInputs(subTree.getDataSourceVariables(), subTree.getDataSourceRef().getValue(), afterSelectRefs), false, subTree.getDataSourceRef().getValue().getInputs().get(0).getValue().getExecutionMode() == ExecutionMode.UNPARTITIONED, context));
}
// Connect each secondary index utilization plan to a common intersect operator.
ILogicalOperator primaryUnnestOp = connectAll2ndarySearchPlanWithIntersect(subRoots, context);
subTree.getDataSourceRef().setValue(primaryUnnestOp);
return primaryUnnestOp != null;
}
use of org.apache.commons.lang3.tuple.Pair in project asterixdb by apache.
the class PushNestedOrderByUnderPreSortedGroupByRule method rewritePost.
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
if (op.getOperatorTag() != LogicalOperatorTag.GROUP) {
return false;
}
if (op.getPhysicalOperator() == null) {
return false;
}
AbstractPhysicalOperator pOp = (AbstractPhysicalOperator) op.getPhysicalOperator();
if (pOp.getOperatorTag() != PhysicalOperatorTag.PRE_CLUSTERED_GROUP_BY) {
return false;
}
GroupByOperator gby = (GroupByOperator) op;
ILogicalPlan plan = gby.getNestedPlans().get(0);
AbstractLogicalOperator op1 = (AbstractLogicalOperator) plan.getRoots().get(0).getValue();
if (op1.getOperatorTag() != LogicalOperatorTag.AGGREGATE) {
return false;
}
Mutable<ILogicalOperator> opRef2 = op1.getInputs().get(0);
AbstractLogicalOperator op2 = (AbstractLogicalOperator) opRef2.getValue();
if (op2.getOperatorTag() != LogicalOperatorTag.ORDER) {
return false;
}
OrderOperator order1 = (OrderOperator) op2;
if (!isIndependentFromChildren(order1)) {
return false;
}
AbstractPhysicalOperator pOrder1 = (AbstractPhysicalOperator) op2.getPhysicalOperator();
if (pOrder1.getOperatorTag() != PhysicalOperatorTag.STABLE_SORT && pOrder1.getOperatorTag() != PhysicalOperatorTag.IN_MEMORY_STABLE_SORT) {
return false;
}
// StableSortPOperator sort1 = (StableSortPOperator) pOrder1;
AbstractLogicalOperator op3 = (AbstractLogicalOperator) op.getInputs().get(0).getValue();
if (op3.getOperatorTag() != LogicalOperatorTag.ORDER) {
return false;
}
AbstractPhysicalOperator pOp3 = (AbstractPhysicalOperator) op3.getPhysicalOperator();
if (pOp3.getOperatorTag() != PhysicalOperatorTag.STABLE_SORT) {
return false;
}
OrderOperator order2 = (OrderOperator) op3;
StableSortPOperator sort2 = (StableSortPOperator) pOp3;
// int k = 0;
for (Pair<IOrder, Mutable<ILogicalExpression>> oe : order1.getOrderExpressions()) {
order2.getOrderExpressions().add(oe);
// sortColumns[n2 + k] = sort1.getSortColumns()[k];
// ++k;
}
// sort2.setSortColumns(sortColumns);
sort2.computeDeliveredProperties(order2, null);
// remove order1
ILogicalOperator underOrder1 = order1.getInputs().get(0).getValue();
opRef2.setValue(underOrder1);
return true;
}
use of org.apache.commons.lang3.tuple.Pair in project asterixdb by apache.
the class FeedOperations method combineIntakeCollectJobs.
private static JobSpecification combineIntakeCollectJobs(MetadataProvider metadataProvider, Feed feed, JobSpecification intakeJob, List<JobSpecification> jobsList, List<FeedConnection> feedConnections, String[] intakeLocations) throws AlgebricksException, HyracksDataException {
JobSpecification jobSpec = new JobSpecification(intakeJob.getFrameSize());
// copy ingestor
FeedIntakeOperatorDescriptor firstOp = (FeedIntakeOperatorDescriptor) intakeJob.getOperatorMap().get(new OperatorDescriptorId(0));
FeedIntakeOperatorDescriptor ingestionOp;
if (firstOp.getAdaptorFactory() == null) {
ingestionOp = new FeedIntakeOperatorDescriptor(jobSpec, feed, firstOp.getAdaptorLibraryName(), firstOp.getAdaptorFactoryClassName(), firstOp.getAdapterOutputType(), firstOp.getPolicyAccessor(), firstOp.getOutputRecordDescriptors()[0]);
} else {
ingestionOp = new FeedIntakeOperatorDescriptor(jobSpec, feed, firstOp.getAdaptorFactory(), firstOp.getAdapterOutputType(), firstOp.getPolicyAccessor(), firstOp.getOutputRecordDescriptors()[0]);
}
// create replicator
ReplicateOperatorDescriptor replicateOp = new ReplicateOperatorDescriptor(jobSpec, ingestionOp.getOutputRecordDescriptors()[0], jobsList.size());
jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), ingestionOp, 0, replicateOp, 0);
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, ingestionOp, intakeLocations);
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, replicateOp, intakeLocations);
// Loop over the jobs to copy operators and connections
Map<OperatorDescriptorId, OperatorDescriptorId> operatorIdMapping = new HashMap<>();
Map<ConnectorDescriptorId, ConnectorDescriptorId> connectorIdMapping = new HashMap<>();
Map<OperatorDescriptorId, List<LocationConstraint>> operatorLocations = new HashMap<>();
Map<OperatorDescriptorId, Integer> operatorCounts = new HashMap<>();
List<JobId> jobIds = new ArrayList<>();
FeedMetaOperatorDescriptor metaOp;
for (int iter1 = 0; iter1 < jobsList.size(); iter1++) {
FeedConnection curFeedConnection = feedConnections.get(iter1);
JobSpecification subJob = jobsList.get(iter1);
operatorIdMapping.clear();
Map<OperatorDescriptorId, IOperatorDescriptor> operatorsMap = subJob.getOperatorMap();
String datasetName = feedConnections.get(iter1).getDatasetName();
FeedConnectionId feedConnectionId = new FeedConnectionId(ingestionOp.getEntityId(), datasetName);
FeedPolicyEntity feedPolicyEntity = FeedMetadataUtil.validateIfPolicyExists(curFeedConnection.getDataverseName(), curFeedConnection.getPolicyName(), metadataProvider.getMetadataTxnContext());
for (Map.Entry<OperatorDescriptorId, IOperatorDescriptor> entry : operatorsMap.entrySet()) {
IOperatorDescriptor opDesc = entry.getValue();
OperatorDescriptorId oldId = opDesc.getOperatorId();
OperatorDescriptorId opId = null;
if (opDesc instanceof LSMTreeInsertDeleteOperatorDescriptor && ((LSMTreeInsertDeleteOperatorDescriptor) opDesc).isPrimary()) {
metaOp = new FeedMetaOperatorDescriptor(jobSpec, feedConnectionId, opDesc, feedPolicyEntity.getProperties(), FeedRuntimeType.STORE);
opId = metaOp.getOperatorId();
opDesc.setOperatorId(opId);
} else {
if (opDesc instanceof AlgebricksMetaOperatorDescriptor) {
AlgebricksMetaOperatorDescriptor algOp = (AlgebricksMetaOperatorDescriptor) opDesc;
IPushRuntimeFactory[] runtimeFactories = algOp.getPipeline().getRuntimeFactories();
// Tweak AssignOp to work with messages
if (runtimeFactories[0] instanceof AssignRuntimeFactory && runtimeFactories.length > 1) {
IConnectorDescriptor connectorDesc = subJob.getOperatorInputMap().get(opDesc.getOperatorId()).get(0);
// anything on the network interface needs to be message compatible
if (connectorDesc instanceof MToNPartitioningConnectorDescriptor) {
metaOp = new FeedMetaOperatorDescriptor(jobSpec, feedConnectionId, opDesc, feedPolicyEntity.getProperties(), FeedRuntimeType.COMPUTE);
opId = metaOp.getOperatorId();
opDesc.setOperatorId(opId);
}
}
}
if (opId == null) {
opId = jobSpec.createOperatorDescriptorId(opDesc);
}
}
operatorIdMapping.put(oldId, opId);
}
// copy connectors
connectorIdMapping.clear();
for (Entry<ConnectorDescriptorId, IConnectorDescriptor> entry : subJob.getConnectorMap().entrySet()) {
IConnectorDescriptor connDesc = entry.getValue();
ConnectorDescriptorId newConnId;
if (connDesc instanceof MToNPartitioningConnectorDescriptor) {
MToNPartitioningConnectorDescriptor m2nConn = (MToNPartitioningConnectorDescriptor) connDesc;
connDesc = new MToNPartitioningWithMessageConnectorDescriptor(jobSpec, m2nConn.getTuplePartitionComputerFactory());
newConnId = connDesc.getConnectorId();
} else {
newConnId = jobSpec.createConnectorDescriptor(connDesc);
}
connectorIdMapping.put(entry.getKey(), newConnId);
}
// make connections between operators
for (Entry<ConnectorDescriptorId, Pair<Pair<IOperatorDescriptor, Integer>, Pair<IOperatorDescriptor, Integer>>> entry : subJob.getConnectorOperatorMap().entrySet()) {
ConnectorDescriptorId newId = connectorIdMapping.get(entry.getKey());
IConnectorDescriptor connDesc = jobSpec.getConnectorMap().get(newId);
Pair<IOperatorDescriptor, Integer> leftOp = entry.getValue().getLeft();
Pair<IOperatorDescriptor, Integer> rightOp = entry.getValue().getRight();
IOperatorDescriptor leftOpDesc = jobSpec.getOperatorMap().get(leftOp.getLeft().getOperatorId());
IOperatorDescriptor rightOpDesc = jobSpec.getOperatorMap().get(rightOp.getLeft().getOperatorId());
if (leftOp.getLeft() instanceof FeedCollectOperatorDescriptor) {
jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), replicateOp, iter1, leftOpDesc, leftOp.getRight());
}
jobSpec.connect(connDesc, leftOpDesc, leftOp.getRight(), rightOpDesc, rightOp.getRight());
}
// prepare for setting partition constraints
operatorLocations.clear();
operatorCounts.clear();
for (Constraint constraint : subJob.getUserConstraints()) {
LValueConstraintExpression lexpr = constraint.getLValue();
ConstraintExpression cexpr = constraint.getRValue();
OperatorDescriptorId opId;
switch(lexpr.getTag()) {
case PARTITION_COUNT:
opId = ((PartitionCountExpression) lexpr).getOperatorDescriptorId();
operatorCounts.put(operatorIdMapping.get(opId), (int) ((ConstantExpression) cexpr).getValue());
break;
case PARTITION_LOCATION:
opId = ((PartitionLocationExpression) lexpr).getOperatorDescriptorId();
IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(operatorIdMapping.get(opId));
List<LocationConstraint> locations = operatorLocations.get(opDesc.getOperatorId());
if (locations == null) {
locations = new ArrayList<>();
operatorLocations.put(opDesc.getOperatorId(), locations);
}
String location = (String) ((ConstantExpression) cexpr).getValue();
LocationConstraint lc = new LocationConstraint(location, ((PartitionLocationExpression) lexpr).getPartition());
locations.add(lc);
break;
default:
break;
}
}
// set absolute location constraints
for (Entry<OperatorDescriptorId, List<LocationConstraint>> entry : operatorLocations.entrySet()) {
IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(entry.getKey());
// why do we need to sort?
Collections.sort(entry.getValue(), (LocationConstraint o1, LocationConstraint o2) -> {
return o1.partition - o2.partition;
});
String[] locations = new String[entry.getValue().size()];
for (int j = 0; j < locations.length; ++j) {
locations[j] = entry.getValue().get(j).location;
}
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, opDesc, locations);
}
// set count constraints
for (Entry<OperatorDescriptorId, Integer> entry : operatorCounts.entrySet()) {
IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(entry.getKey());
if (!operatorLocations.keySet().contains(entry.getKey())) {
PartitionConstraintHelper.addPartitionCountConstraint(jobSpec, opDesc, entry.getValue());
}
}
// roots
for (OperatorDescriptorId root : subJob.getRoots()) {
jobSpec.addRoot(jobSpec.getOperatorMap().get(operatorIdMapping.get(root)));
}
jobIds.add(((JobEventListenerFactory) subJob.getJobletEventListenerFactory()).getJobId());
}
// jobEventListenerFactory
jobSpec.setJobletEventListenerFactory(new MultiTransactionJobletEventListenerFactory(jobIds, true));
// useConnectorSchedulingPolicy
jobSpec.setUseConnectorPolicyForScheduling(jobsList.get(0).isUseConnectorPolicyForScheduling());
// connectorAssignmentPolicy
jobSpec.setConnectorPolicyAssignmentPolicy(jobsList.get(0).getConnectorPolicyAssignmentPolicy());
return jobSpec;
}
use of org.apache.commons.lang3.tuple.Pair in project asterixdb by apache.
the class Dataset method drop.
/**
* Drop this dataset
*
* @param metadataProvider
* metadata provider that can be used to get metadata info and runtimes
* @param mdTxnCtx
* the transaction context
* @param jobsToExecute
* a list of jobs to be executed as part of the drop operation
* @param bActiveTxn
* whether the metadata transaction is ongoing
* @param progress
* a mutable progress state used for error handling during the drop operation
* @param hcc
* a client connection to hyracks master for job execution
* @throws Exception
* if an error occur during the drop process or if the dataset can't be dropped for any reason
*/
public void drop(MetadataProvider metadataProvider, MutableObject<MetadataTransactionContext> mdTxnCtx, List<JobSpecification> jobsToExecute, MutableBoolean bActiveTxn, MutableObject<ProgressState> progress, IHyracksClientConnection hcc, boolean dropCorrespondingNodeGroup) throws Exception {
Map<FeedConnectionId, Pair<JobSpecification, Boolean>> disconnectJobList = new HashMap<>();
if (getDatasetType() == DatasetType.INTERNAL) {
// prepare job spec(s) that would disconnect any active feeds involving the dataset.
ActiveLifecycleListener activeListener = (ActiveLifecycleListener) metadataProvider.getApplicationContext().getActiveLifecycleListener();
IActiveEntityEventsListener[] activeListeners = activeListener.getNotificationHandler().getEventListeners();
for (IActiveEntityEventsListener listener : activeListeners) {
if (listener.isEntityUsingDataset(this)) {
throw new CompilationException(ErrorCode.COMPILATION_CANT_DROP_ACTIVE_DATASET, RecordUtil.toFullyQualifiedName(dataverseName, datasetName), listener.getEntityId().toString());
}
}
// #. prepare jobs to drop the datatset and the indexes in NC
List<Index> indexes = MetadataManager.INSTANCE.getDatasetIndexes(mdTxnCtx.getValue(), dataverseName, datasetName);
for (int j = 0; j < indexes.size(); j++) {
if (indexes.get(j).isSecondaryIndex()) {
jobsToExecute.add(IndexUtil.buildDropIndexJobSpec(indexes.get(j), metadataProvider, this));
}
}
jobsToExecute.add(DatasetUtil.dropDatasetJobSpec(this, metadataProvider));
// #. mark the existing dataset as PendingDropOp
MetadataManager.INSTANCE.dropDataset(mdTxnCtx.getValue(), dataverseName, datasetName);
MetadataManager.INSTANCE.addDataset(mdTxnCtx.getValue(), new Dataset(dataverseName, datasetName, getItemTypeDataverseName(), getItemTypeName(), getMetaItemTypeDataverseName(), getMetaItemTypeName(), getNodeGroupName(), getCompactionPolicy(), getCompactionPolicyProperties(), getDatasetDetails(), getHints(), getDatasetType(), getDatasetId(), MetadataUtil.PENDING_DROP_OP));
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx.getValue());
bActiveTxn.setValue(false);
progress.setValue(ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA);
// # disconnect the feeds
for (Pair<JobSpecification, Boolean> p : disconnectJobList.values()) {
JobUtils.runJob(hcc, p.first, true);
}
// #. run the jobs
for (JobSpecification jobSpec : jobsToExecute) {
JobUtils.runJob(hcc, jobSpec, true);
}
mdTxnCtx.setValue(MetadataManager.INSTANCE.beginTransaction());
bActiveTxn.setValue(true);
metadataProvider.setMetadataTxnContext(mdTxnCtx.getValue());
} else {
// External dataset
ExternalDatasetsRegistry.INSTANCE.removeDatasetInfo(this);
// #. prepare jobs to drop the datatset and the indexes in NC
List<Index> indexes = MetadataManager.INSTANCE.getDatasetIndexes(mdTxnCtx.getValue(), dataverseName, datasetName);
for (int j = 0; j < indexes.size(); j++) {
if (ExternalIndexingOperations.isFileIndex(indexes.get(j))) {
jobsToExecute.add(IndexUtil.buildDropIndexJobSpec(indexes.get(j), metadataProvider, this));
} else {
jobsToExecute.add(DatasetUtil.buildDropFilesIndexJobSpec(metadataProvider, this));
}
}
// #. mark the existing dataset as PendingDropOp
MetadataManager.INSTANCE.dropDataset(mdTxnCtx.getValue(), dataverseName, datasetName);
MetadataManager.INSTANCE.addDataset(mdTxnCtx.getValue(), new Dataset(dataverseName, datasetName, getItemTypeDataverseName(), getItemTypeName(), getNodeGroupName(), getCompactionPolicy(), getCompactionPolicyProperties(), getDatasetDetails(), getHints(), getDatasetType(), getDatasetId(), MetadataUtil.PENDING_DROP_OP));
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx.getValue());
bActiveTxn.setValue(false);
progress.setValue(ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA);
// #. run the jobs
for (JobSpecification jobSpec : jobsToExecute) {
JobUtils.runJob(hcc, jobSpec, true);
}
if (!indexes.isEmpty()) {
ExternalDatasetsRegistry.INSTANCE.removeDatasetInfo(this);
}
mdTxnCtx.setValue(MetadataManager.INSTANCE.beginTransaction());
bActiveTxn.setValue(true);
metadataProvider.setMetadataTxnContext(mdTxnCtx.getValue());
}
// #. finally, delete the dataset.
MetadataManager.INSTANCE.dropDataset(mdTxnCtx.getValue(), dataverseName, datasetName);
// Drops the associated nodegroup if it is no longer used by any other dataset.
if (dropCorrespondingNodeGroup) {
MetadataLockManager.INSTANCE.acquireNodeGroupWriteLock(metadataProvider.getLocks(), nodeGroupName);
MetadataManager.INSTANCE.dropNodegroup(mdTxnCtx.getValue(), nodeGroupName, true);
}
}
Aggregations