use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class CountOfCountsTest method countOfCountsMultiNC.
@Test
public void countOfCountsMultiNC() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] splits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt") };
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
InMemorySortOperatorDescriptor sorter = new InMemorySortOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter2, NC1_ID, NC2_ID);
RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID, NC2_ID);
ResultSetId rsId = new ResultSetId(1);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
spec.addResultSetId(rsId);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, sorter, 0);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, sorter, 0, group, 0);
IConnectorDescriptor conn3 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn3, group, 0, sorter2, 0);
IConnectorDescriptor conn4 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn4, sorter2, 0, group2, 0);
IConnectorDescriptor conn5 = new MToNBroadcastConnectorDescriptor(spec);
spec.connect(conn5, group2, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class CountOfCountsTest method countOfCountsExternalSortMultiNC.
@Test
public void countOfCountsExternalSortMultiNC() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] splits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt") };
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 3, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter2, NC1_ID, NC2_ID);
RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID, NC2_ID);
ResultSetId rsId = new ResultSetId(1);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
spec.addResultSetId(rsId);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, sorter, 0);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, sorter, 0, group, 0);
IConnectorDescriptor conn3 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn3, group, 0, sorter2, 0);
IConnectorDescriptor conn4 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn4, sorter2, 0, group2, 0);
IConnectorDescriptor conn5 = new MToNBroadcastConnectorDescriptor(spec);
spec.connect(conn5, group2, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class AggregationTest method singleKeyAvgExtGroupTest.
@Test
public void singleKeyAvgExtGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
int frameLimits = 5;
int tableSize = 8;
long fileSize = frameLimits * spec.getFrameSize();
ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, keyFields, frameLimits, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new CountFieldAggregatorFactory(false), new AvgFieldGroupAggregatorFactory(1, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), new AvgFieldMergeAggregatorFactory(3, false) }), outputRec, outputRec, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }));
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgExtGroupTest");
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class RebalanceUtil method populateDataToRebalanceTarget.
// Populates the data from the source dataset to the rebalance target dataset.
private static void populateDataToRebalanceTarget(Dataset source, Dataset target, MetadataProvider metadataProvider, IHyracksClientConnection hcc) throws Exception {
JobSpecification spec = new JobSpecification();
JobId jobId = JobIdFactory.generateJobId();
JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(jobId, true);
spec.setJobletEventListenerFactory(jobEventListenerFactory);
// The pipeline starter.
IOperatorDescriptor starter = DatasetUtil.createDummyKeyProviderOp(spec, source, metadataProvider);
// Creates primary index scan op.
IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, source, jobId);
// Creates secondary BTree upsert op.
IOperatorDescriptor upsertOp = createPrimaryIndexUpsertOp(spec, metadataProvider, source, target);
// The final commit operator.
IOperatorDescriptor commitOp = createUpsertCommitOp(spec, metadataProvider, jobId, target);
// Connects empty-tuple-source and scan.
spec.connect(new OneToOneConnectorDescriptor(spec), starter, 0, primaryScanOp, 0);
// Connects scan and upsert.
int numKeys = target.getPrimaryKeys().size();
int[] keys = IntStream.range(0, numKeys).toArray();
IConnectorDescriptor connectorDescriptor = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, target.getPrimaryHashFunctionFactories(metadataProvider)));
spec.connect(connectorDescriptor, primaryScanOp, 0, upsertOp, 0);
// Connects upsert and sink.
spec.connect(new OneToOneConnectorDescriptor(spec), upsertOp, 0, commitOp, 0);
// Executes the job.
JobUtils.runJob(hcc, spec, true);
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class ActivityClusterGraphRewriter method rewriteIntraActivityCluster.
/**
* rewrite an activity cluster internally
*
* @param ac
* the activity cluster to be rewritten
*/
private void rewriteIntraActivityCluster(ActivityCluster ac, Map<IActivity, SuperActivity> invertedActivitySuperActivityMap) {
Map<ActivityId, IActivity> activities = ac.getActivityMap();
Map<ActivityId, List<IConnectorDescriptor>> activityInputMap = ac.getActivityInputMap();
Map<ActivityId, List<IConnectorDescriptor>> activityOutputMap = ac.getActivityOutputMap();
Map<ConnectorDescriptorId, Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>>> connectorActivityMap = ac.getConnectorActivityMap();
ActivityClusterGraph acg = ac.getActivityClusterGraph();
Map<ActivityId, IActivity> startActivities = new HashMap<ActivityId, IActivity>();
Map<ActivityId, SuperActivity> superActivities = new HashMap<ActivityId, SuperActivity>();
Map<ActivityId, Queue<IActivity>> toBeExpendedMap = new HashMap<ActivityId, Queue<IActivity>>();
/**
* Build the initial super activities
*/
for (Entry<ActivityId, IActivity> entry : activities.entrySet()) {
ActivityId activityId = entry.getKey();
IActivity activity = entry.getValue();
if (activityInputMap.get(activityId) == null) {
startActivities.put(activityId, activity);
/**
* use the start activity's id as the id of the super activity
*/
createNewSuperActivity(ac, superActivities, toBeExpendedMap, invertedActivitySuperActivityMap, activityId, activity);
}
}
/**
* expand one-to-one connected activity cluster by the BFS order.
* after the while-loop, the original activities are partitioned
* into equivalent classes, one-per-super-activity.
*/
Map<ActivityId, SuperActivity> clonedSuperActivities = new HashMap<ActivityId, SuperActivity>();
while (toBeExpendedMap.size() > 0) {
clonedSuperActivities.clear();
clonedSuperActivities.putAll(superActivities);
for (Entry<ActivityId, SuperActivity> entry : clonedSuperActivities.entrySet()) {
ActivityId superActivityId = entry.getKey();
SuperActivity superActivity = entry.getValue();
/**
* for the case where the super activity has already been swallowed
*/
if (superActivities.get(superActivityId) == null) {
continue;
}
/**
* expend the super activity
*/
Queue<IActivity> toBeExpended = toBeExpendedMap.get(superActivityId);
if (toBeExpended == null) {
/**
* Nothing to expand
*/
continue;
}
IActivity expendingActivity = toBeExpended.poll();
List<IConnectorDescriptor> outputConnectors = activityOutputMap.get(expendingActivity.getActivityId());
if (outputConnectors != null) {
for (IConnectorDescriptor outputConn : outputConnectors) {
Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>> endPoints = connectorActivityMap.get(outputConn.getConnectorId());
IActivity newActivity = endPoints.getRight().getLeft();
SuperActivity existingSuperActivity = invertedActivitySuperActivityMap.get(newActivity);
if (outputConn.getClass().getName().contains(ONE_TO_ONE_CONNECTOR)) {
/**
* expend the super activity cluster on an one-to-one out-bound connection
*/
if (existingSuperActivity == null) {
superActivity.addActivity(newActivity);
toBeExpended.add(newActivity);
invertedActivitySuperActivityMap.put(newActivity, superActivity);
} else {
/**
* the two activities already in the same super activity
*/
if (existingSuperActivity == superActivity) {
continue;
}
/**
* swallow an existing super activity
*/
swallowExistingSuperActivity(superActivities, toBeExpendedMap, invertedActivitySuperActivityMap, superActivity, superActivityId, existingSuperActivity);
}
} else {
if (existingSuperActivity == null) {
/**
* create new activity
*/
createNewSuperActivity(ac, superActivities, toBeExpendedMap, invertedActivitySuperActivityMap, newActivity.getActivityId(), newActivity);
}
}
}
}
/**
* remove the to-be-expended queue if it is empty
*/
if (toBeExpended.size() == 0) {
toBeExpendedMap.remove(superActivityId);
}
}
}
Map<ConnectorDescriptorId, IConnectorDescriptor> connMap = ac.getConnectorMap();
Map<ConnectorDescriptorId, RecordDescriptor> connRecordDesc = ac.getConnectorRecordDescriptorMap();
Map<SuperActivity, Integer> superActivityProducerPort = new HashMap<SuperActivity, Integer>();
Map<SuperActivity, Integer> superActivityConsumerPort = new HashMap<SuperActivity, Integer>();
for (Entry<ActivityId, SuperActivity> entry : superActivities.entrySet()) {
superActivityProducerPort.put(entry.getValue(), 0);
superActivityConsumerPort.put(entry.getValue(), 0);
}
/**
* create a new activity cluster to replace the old activity cluster
*/
ActivityCluster newActivityCluster = new ActivityCluster(acg, ac.getId());
newActivityCluster.setConnectorPolicyAssignmentPolicy(ac.getConnectorPolicyAssignmentPolicy());
for (Entry<ActivityId, SuperActivity> entry : superActivities.entrySet()) {
newActivityCluster.addActivity(entry.getValue());
acg.getActivityMap().put(entry.getKey(), newActivityCluster);
}
/**
* Setup connectors: either inside a super activity or among super activities
*/
for (Entry<ConnectorDescriptorId, Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>>> entry : connectorActivityMap.entrySet()) {
ConnectorDescriptorId connectorId = entry.getKey();
Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>> endPoints = entry.getValue();
IActivity producerActivity = endPoints.getLeft().getLeft();
IActivity consumerActivity = endPoints.getRight().getLeft();
int producerPort = endPoints.getLeft().getRight();
int consumerPort = endPoints.getRight().getRight();
RecordDescriptor recordDescriptor = connRecordDesc.get(connectorId);
IConnectorDescriptor conn = connMap.get(connectorId);
if (conn.getClass().getName().contains(ONE_TO_ONE_CONNECTOR)) {
/**
* connection edge between inner activities
*/
SuperActivity residingSuperActivity = invertedActivitySuperActivityMap.get(producerActivity);
residingSuperActivity.connect(conn, producerActivity, producerPort, consumerActivity, consumerPort, recordDescriptor);
} else {
/**
* connection edge between super activities
*/
SuperActivity producerSuperActivity = invertedActivitySuperActivityMap.get(producerActivity);
SuperActivity consumerSuperActivity = invertedActivitySuperActivityMap.get(consumerActivity);
int producerSAPort = superActivityProducerPort.get(producerSuperActivity);
int consumerSAPort = superActivityConsumerPort.get(consumerSuperActivity);
newActivityCluster.addConnector(conn);
newActivityCluster.connect(conn, producerSuperActivity, producerSAPort, consumerSuperActivity, consumerSAPort, recordDescriptor);
/**
* bridge the port
*/
producerSuperActivity.setClusterOutputIndex(producerSAPort, producerActivity.getActivityId(), producerPort);
consumerSuperActivity.setClusterInputIndex(consumerSAPort, consumerActivity.getActivityId(), consumerPort);
acg.getConnectorMap().put(connectorId, newActivityCluster);
/**
* increasing the port number for the producer and consumer
*/
superActivityProducerPort.put(producerSuperActivity, ++producerSAPort);
superActivityConsumerPort.put(consumerSuperActivity, ++consumerSAPort);
}
}
/**
* Set up the roots of the new activity cluster
*/
for (Entry<ActivityId, SuperActivity> entry : superActivities.entrySet()) {
List<IConnectorDescriptor> connIds = newActivityCluster.getActivityOutputMap().get(entry.getKey());
if (connIds == null || connIds.size() == 0) {
newActivityCluster.addRoot(entry.getValue());
}
}
/**
* set up the blocked2Blocker mapping, which will be updated in the rewriteInterActivityCluster call
*/
newActivityCluster.getBlocked2BlockerMap().putAll(ac.getBlocked2BlockerMap());
/**
* replace the old activity cluster with the new activity cluster
*/
acg.getActivityClusterMap().put(ac.getId(), newActivityCluster);
}
Aggregations