use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class FeedOperations method combineIntakeCollectJobs.
private static JobSpecification combineIntakeCollectJobs(MetadataProvider metadataProvider, Feed feed, JobSpecification intakeJob, List<JobSpecification> jobsList, List<FeedConnection> feedConnections, String[] intakeLocations) throws AlgebricksException, HyracksDataException {
JobSpecification jobSpec = new JobSpecification(intakeJob.getFrameSize());
// copy ingestor
FeedIntakeOperatorDescriptor firstOp = (FeedIntakeOperatorDescriptor) intakeJob.getOperatorMap().get(new OperatorDescriptorId(0));
FeedIntakeOperatorDescriptor ingestionOp;
if (firstOp.getAdaptorFactory() == null) {
ingestionOp = new FeedIntakeOperatorDescriptor(jobSpec, feed, firstOp.getAdaptorLibraryName(), firstOp.getAdaptorFactoryClassName(), firstOp.getAdapterOutputType(), firstOp.getPolicyAccessor(), firstOp.getOutputRecordDescriptors()[0]);
} else {
ingestionOp = new FeedIntakeOperatorDescriptor(jobSpec, feed, firstOp.getAdaptorFactory(), firstOp.getAdapterOutputType(), firstOp.getPolicyAccessor(), firstOp.getOutputRecordDescriptors()[0]);
}
// create replicator
ReplicateOperatorDescriptor replicateOp = new ReplicateOperatorDescriptor(jobSpec, ingestionOp.getOutputRecordDescriptors()[0], jobsList.size());
jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), ingestionOp, 0, replicateOp, 0);
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, ingestionOp, intakeLocations);
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, replicateOp, intakeLocations);
// Loop over the jobs to copy operators and connections
Map<OperatorDescriptorId, OperatorDescriptorId> operatorIdMapping = new HashMap<>();
Map<ConnectorDescriptorId, ConnectorDescriptorId> connectorIdMapping = new HashMap<>();
Map<OperatorDescriptorId, List<LocationConstraint>> operatorLocations = new HashMap<>();
Map<OperatorDescriptorId, Integer> operatorCounts = new HashMap<>();
List<JobId> jobIds = new ArrayList<>();
FeedMetaOperatorDescriptor metaOp;
for (int iter1 = 0; iter1 < jobsList.size(); iter1++) {
FeedConnection curFeedConnection = feedConnections.get(iter1);
JobSpecification subJob = jobsList.get(iter1);
operatorIdMapping.clear();
Map<OperatorDescriptorId, IOperatorDescriptor> operatorsMap = subJob.getOperatorMap();
String datasetName = feedConnections.get(iter1).getDatasetName();
FeedConnectionId feedConnectionId = new FeedConnectionId(ingestionOp.getEntityId(), datasetName);
FeedPolicyEntity feedPolicyEntity = FeedMetadataUtil.validateIfPolicyExists(curFeedConnection.getDataverseName(), curFeedConnection.getPolicyName(), metadataProvider.getMetadataTxnContext());
for (Map.Entry<OperatorDescriptorId, IOperatorDescriptor> entry : operatorsMap.entrySet()) {
IOperatorDescriptor opDesc = entry.getValue();
OperatorDescriptorId oldId = opDesc.getOperatorId();
OperatorDescriptorId opId = null;
if (opDesc instanceof LSMTreeInsertDeleteOperatorDescriptor && ((LSMTreeInsertDeleteOperatorDescriptor) opDesc).isPrimary()) {
metaOp = new FeedMetaOperatorDescriptor(jobSpec, feedConnectionId, opDesc, feedPolicyEntity.getProperties(), FeedRuntimeType.STORE);
opId = metaOp.getOperatorId();
opDesc.setOperatorId(opId);
} else {
if (opDesc instanceof AlgebricksMetaOperatorDescriptor) {
AlgebricksMetaOperatorDescriptor algOp = (AlgebricksMetaOperatorDescriptor) opDesc;
IPushRuntimeFactory[] runtimeFactories = algOp.getPipeline().getRuntimeFactories();
// Tweak AssignOp to work with messages
if (runtimeFactories[0] instanceof AssignRuntimeFactory && runtimeFactories.length > 1) {
IConnectorDescriptor connectorDesc = subJob.getOperatorInputMap().get(opDesc.getOperatorId()).get(0);
// anything on the network interface needs to be message compatible
if (connectorDesc instanceof MToNPartitioningConnectorDescriptor) {
metaOp = new FeedMetaOperatorDescriptor(jobSpec, feedConnectionId, opDesc, feedPolicyEntity.getProperties(), FeedRuntimeType.COMPUTE);
opId = metaOp.getOperatorId();
opDesc.setOperatorId(opId);
}
}
}
if (opId == null) {
opId = jobSpec.createOperatorDescriptorId(opDesc);
}
}
operatorIdMapping.put(oldId, opId);
}
// copy connectors
connectorIdMapping.clear();
for (Entry<ConnectorDescriptorId, IConnectorDescriptor> entry : subJob.getConnectorMap().entrySet()) {
IConnectorDescriptor connDesc = entry.getValue();
ConnectorDescriptorId newConnId;
if (connDesc instanceof MToNPartitioningConnectorDescriptor) {
MToNPartitioningConnectorDescriptor m2nConn = (MToNPartitioningConnectorDescriptor) connDesc;
connDesc = new MToNPartitioningWithMessageConnectorDescriptor(jobSpec, m2nConn.getTuplePartitionComputerFactory());
newConnId = connDesc.getConnectorId();
} else {
newConnId = jobSpec.createConnectorDescriptor(connDesc);
}
connectorIdMapping.put(entry.getKey(), newConnId);
}
// make connections between operators
for (Entry<ConnectorDescriptorId, Pair<Pair<IOperatorDescriptor, Integer>, Pair<IOperatorDescriptor, Integer>>> entry : subJob.getConnectorOperatorMap().entrySet()) {
ConnectorDescriptorId newId = connectorIdMapping.get(entry.getKey());
IConnectorDescriptor connDesc = jobSpec.getConnectorMap().get(newId);
Pair<IOperatorDescriptor, Integer> leftOp = entry.getValue().getLeft();
Pair<IOperatorDescriptor, Integer> rightOp = entry.getValue().getRight();
IOperatorDescriptor leftOpDesc = jobSpec.getOperatorMap().get(leftOp.getLeft().getOperatorId());
IOperatorDescriptor rightOpDesc = jobSpec.getOperatorMap().get(rightOp.getLeft().getOperatorId());
if (leftOp.getLeft() instanceof FeedCollectOperatorDescriptor) {
jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), replicateOp, iter1, leftOpDesc, leftOp.getRight());
}
jobSpec.connect(connDesc, leftOpDesc, leftOp.getRight(), rightOpDesc, rightOp.getRight());
}
// prepare for setting partition constraints
operatorLocations.clear();
operatorCounts.clear();
for (Constraint constraint : subJob.getUserConstraints()) {
LValueConstraintExpression lexpr = constraint.getLValue();
ConstraintExpression cexpr = constraint.getRValue();
OperatorDescriptorId opId;
switch(lexpr.getTag()) {
case PARTITION_COUNT:
opId = ((PartitionCountExpression) lexpr).getOperatorDescriptorId();
operatorCounts.put(operatorIdMapping.get(opId), (int) ((ConstantExpression) cexpr).getValue());
break;
case PARTITION_LOCATION:
opId = ((PartitionLocationExpression) lexpr).getOperatorDescriptorId();
IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(operatorIdMapping.get(opId));
List<LocationConstraint> locations = operatorLocations.get(opDesc.getOperatorId());
if (locations == null) {
locations = new ArrayList<>();
operatorLocations.put(opDesc.getOperatorId(), locations);
}
String location = (String) ((ConstantExpression) cexpr).getValue();
LocationConstraint lc = new LocationConstraint(location, ((PartitionLocationExpression) lexpr).getPartition());
locations.add(lc);
break;
default:
break;
}
}
// set absolute location constraints
for (Entry<OperatorDescriptorId, List<LocationConstraint>> entry : operatorLocations.entrySet()) {
IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(entry.getKey());
// why do we need to sort?
Collections.sort(entry.getValue(), (LocationConstraint o1, LocationConstraint o2) -> {
return o1.partition - o2.partition;
});
String[] locations = new String[entry.getValue().size()];
for (int j = 0; j < locations.length; ++j) {
locations[j] = entry.getValue().get(j).location;
}
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, opDesc, locations);
}
// set count constraints
for (Entry<OperatorDescriptorId, Integer> entry : operatorCounts.entrySet()) {
IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(entry.getKey());
if (!operatorLocations.keySet().contains(entry.getKey())) {
PartitionConstraintHelper.addPartitionCountConstraint(jobSpec, opDesc, entry.getValue());
}
}
// roots
for (OperatorDescriptorId root : subJob.getRoots()) {
jobSpec.addRoot(jobSpec.getOperatorMap().get(operatorIdMapping.get(root)));
}
jobIds.add(((JobEventListenerFactory) subJob.getJobletEventListenerFactory()).getJobId());
}
// jobEventListenerFactory
jobSpec.setJobletEventListenerFactory(new MultiTransactionJobletEventListenerFactory(jobIds, true));
// useConnectorSchedulingPolicy
jobSpec.setUseConnectorPolicyForScheduling(jobsList.get(0).isUseConnectorPolicyForScheduling());
// connectorAssignmentPolicy
jobSpec.setConnectorPolicyAssignmentPolicy(jobsList.get(0).getConnectorPolicyAssignmentPolicy());
return jobSpec;
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class JobBuilder method setPartitionConstraintsTopdown.
private void setPartitionConstraintsTopdown(OperatorDescriptorId opId, Map<IConnectorDescriptor, TargetConstraint> tgtConstraints, IOperatorDescriptor parentOp) {
List<IConnectorDescriptor> opInputs = jobSpec.getOperatorInputMap().get(opId);
AlgebricksPartitionConstraint opConstraint;
IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(opId);
if (opInputs != null) {
for (IConnectorDescriptor conn : opInputs) {
ConnectorDescriptorId cid = conn.getConnectorId();
org.apache.commons.lang3.tuple.Pair<org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>, org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>> p = jobSpec.getConnectorOperatorMap().get(cid);
IOperatorDescriptor src = p.getLeft().getLeft();
TargetConstraint constraint = tgtConstraints.get(conn);
if (constraint != null) {
if (constraint == TargetConstraint.SAME_COUNT) {
opConstraint = partitionConstraintMap.get(opDesc);
if (partitionConstraintMap.get(src) == null) {
if (opConstraint != null) {
partitionConstraintMap.put(src, opConstraint);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(jobSpec, src, opConstraint);
}
}
}
}
// Post Order DFS
setPartitionConstraintsTopdown(src.getOperatorId(), tgtConstraints, opDesc);
}
}
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class JobBuilder method buildSpec.
@Override
public void buildSpec(List<ILogicalOperator> roots) throws AlgebricksException {
buildAsterixComponents();
Map<IConnectorDescriptor, TargetConstraint> tgtConstraints = setupConnectors();
for (ILogicalOperator r : roots) {
IOperatorDescriptor opDesc = findOpDescForAlgebraicOp(r);
jobSpec.addRoot(opDesc);
}
setAllPartitionConstraints(tgtConstraints);
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class InsertPipelineExample method createJob.
private static JobSpecification createJob(Options options) {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
// schema of tuples to be generated: 4 fields with int, string, string,
// string
// we will use field 2 as primary key to fill a clustered index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // this field will not go into B-Tree
new UTF8StringSerializerDeserializer(), // we will use this as payload
new UTF8StringSerializerDeserializer(), // we will use this field as key
IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
new UTF8StringSerializerDeserializer() });
// generate numRecords records with field 2 being unique, integer values
// in [0, 100000], and strings with max length of 10 characters, and
// random seed 100
DataGenOperatorDescriptor dataGen = new DataGenOperatorDescriptor(spec, recDesc, options.numTuples, 2, 0, 100000, 10, 100);
// run data generator on first nodecontroller given
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, dataGen, splitNCs[0]);
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// prepare insertion into primary index
// tuples to be put into B-Tree shall have 4 fields
int primaryFieldCount = 4;
ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// comparator factories for primary index
IBinaryComparatorFactory[] primaryComparatorFactories = new IBinaryComparatorFactory[1];
primaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// the B-Tree expects its keyfields to be at the front of its input
// tuple
// map field 2 of input
int[] primaryFieldPermutation = { 2, 1, 3, 4 };
// tuple to field 0 of
// B-Tree tuple, etc.
IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
// create operator descriptor
TreeIndexInsertUpdateDeleteOperatorDescriptor primaryInsert = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, recDesc, primaryFieldPermutation, IndexOperation.INSERT, primaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, primaryInsert, splitNCs);
// prepare insertion into secondary index
// tuples to be put into B-Tree shall have 2 fields
int secondaryFieldCount = 2;
ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
// comparator factories for secondary index
IBinaryComparatorFactory[] secondaryComparatorFactories = new IBinaryComparatorFactory[2];
secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// the B-Tree expects its keyfields to be at the front of its input
// tuple
int[] secondaryFieldPermutation = { 1, 2 };
IFileSplitProvider secondarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
// create operator descriptor
TreeIndexInsertUpdateDeleteOperatorDescriptor secondaryInsert = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, recDesc, secondaryFieldPermutation, IndexOperation.INSERT, secondaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, secondaryInsert, splitNCs);
// end the insert pipeline at this sink operator
NullSinkOperatorDescriptor nullSink = new NullSinkOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, nullSink, splitNCs);
// distribute the records from the datagen via hashing to the bulk load
// ops
IBinaryHashFunctionFactory[] hashFactories = new IBinaryHashFunctionFactory[1];
hashFactories[0] = PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY);
IConnectorDescriptor hashConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, hashFactories));
// connect the ops
spec.connect(hashConn, dataGen, 0, primaryInsert, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primaryInsert, 0, secondaryInsert, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryInsert, 0, nullSink, 0);
spec.addRoot(nullSink);
return spec;
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class PrimaryIndexBulkLoadExample method createJob.
private static JobSpecification createJob(Options options) {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
// schema of tuples to be generated: 5 fields with string, string, int,
// int, string
// we will use field-index 2 as primary key to fill a clustered index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // this field will not go into B-Tree
new UTF8StringSerializerDeserializer(), // we will use this as payload
new UTF8StringSerializerDeserializer(), // we will use this field as key
IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
new UTF8StringSerializerDeserializer() });
// generate numRecords records with field 2 being unique, integer values
// in [0, 100000], and strings with max length of 10 characters, and
// random seed 50
DataGenOperatorDescriptor dataGen = new DataGenOperatorDescriptor(spec, recDesc, options.numTuples, 2, 0, 100000, 10, 50);
// run data generator on first nodecontroller given
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, dataGen, splitNCs[0]);
// sort the tuples as preparation for bulk load
// fields to sort on
int[] sortFields = { 2 };
// comparators for sort fields
IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[1];
comparatorFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, options.sbSize, sortFields, comparatorFactories, recDesc);
JobHelper.createPartitionConstraint(spec, sorter, splitNCs);
// tuples to be put into B-Tree shall have 4 fields
int fieldCount = 4;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = IntegerPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[2] = IntegerPointable.TYPE_TRAITS;
typeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// create providers for B-Tree
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// the B-Tree expects its keyfields to be at the front of its input
// tuple
// map field 2 of input tuple
int[] fieldPermutation = { 2, 1, 3, 4 };
// to field 0 of B-Tree tuple,
// etc.
IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.btreeName);
IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, recDesc, fieldPermutation, 0.7f, false, 1000L, true, dataflowHelperFactory);
JobHelper.createPartitionConstraint(spec, btreeBulkLoad, splitNCs);
// distribute the records from the datagen via hashing to the bulk load
// ops
IBinaryHashFunctionFactory[] hashFactories = new IBinaryHashFunctionFactory[1];
hashFactories[0] = PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY);
IConnectorDescriptor hashConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, hashFactories));
NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, nsOpDesc, splitNCs);
spec.connect(hashConn, dataGen, 0, sorter, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, btreeBulkLoad, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), btreeBulkLoad, 0, nsOpDesc, 0);
spec.addRoot(nsOpDesc);
return spec;
}
Aggregations