Search in sources :

Example 6 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class CountOfCountsTest method countOfCountsMultiNC.

@Test
public void countOfCountsMultiNC() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] splits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt") };
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
    RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
    InMemorySortOperatorDescriptor sorter = new InMemorySortOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
    RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
    PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
    InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, desc2);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter2, NC1_ID, NC2_ID);
    RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    spec.addResultSetId(rsId);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, csvScanner, 0, sorter, 0);
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, sorter, 0, group, 0);
    IConnectorDescriptor conn3 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn3, group, 0, sorter2, 0);
    IConnectorDescriptor conn4 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn4, sorter2, 0, group2, 0);
    IConnectorDescriptor conn5 = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(conn5, group2, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) CountFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) InMemorySortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PreclusteredGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor) Test(org.junit.Test)

Example 7 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class CountOfCountsTest method countOfCountsExternalSortMultiNC.

@Test
public void countOfCountsExternalSortMultiNC() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] splits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt") };
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
    RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
    ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 3, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
    RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
    PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
    InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, desc2);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter2, NC1_ID, NC2_ID);
    RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    spec.addResultSetId(rsId);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, csvScanner, 0, sorter, 0);
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, sorter, 0, group, 0);
    IConnectorDescriptor conn3 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn3, group, 0, sorter2, 0);
    IConnectorDescriptor conn4 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn4, sorter2, 0, group2, 0);
    IConnectorDescriptor conn5 = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(conn5, group2, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) CountFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) InMemorySortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) PreclusteredGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor) Test(org.junit.Test)

Example 8 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class AggregationTest method singleKeyAvgExtGroupTest.

@Test
public void singleKeyAvgExtGroupTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
    RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
    int[] keyFields = new int[] { 0 };
    int frameLimits = 5;
    int tableSize = 8;
    long fileSize = frameLimits * spec.getFrameSize();
    ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, keyFields, frameLimits, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new CountFieldAggregatorFactory(false), new AvgFieldGroupAggregatorFactory(1, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), new AvgFieldMergeAggregatorFactory(3, false) }), outputRec, outputRec, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }));
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, csvScanner, 0, grouper, 0);
    AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgExtGroupTest");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, grouper, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) AvgFieldMergeAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.AvgFieldMergeAggregatorFactory) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) CountFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) AvgFieldGroupAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.AvgFieldGroupAggregatorFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IBinaryHashFunctionFamily(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily) Test(org.junit.Test)

Example 9 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class RebalanceUtil method populateDataToRebalanceTarget.

// Populates the data from the source dataset to the rebalance target dataset.
private static void populateDataToRebalanceTarget(Dataset source, Dataset target, MetadataProvider metadataProvider, IHyracksClientConnection hcc) throws Exception {
    JobSpecification spec = new JobSpecification();
    JobId jobId = JobIdFactory.generateJobId();
    JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(jobId, true);
    spec.setJobletEventListenerFactory(jobEventListenerFactory);
    // The pipeline starter.
    IOperatorDescriptor starter = DatasetUtil.createDummyKeyProviderOp(spec, source, metadataProvider);
    // Creates primary index scan op.
    IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, source, jobId);
    // Creates secondary BTree upsert op.
    IOperatorDescriptor upsertOp = createPrimaryIndexUpsertOp(spec, metadataProvider, source, target);
    // The final commit operator.
    IOperatorDescriptor commitOp = createUpsertCommitOp(spec, metadataProvider, jobId, target);
    // Connects empty-tuple-source and scan.
    spec.connect(new OneToOneConnectorDescriptor(spec), starter, 0, primaryScanOp, 0);
    // Connects scan and upsert.
    int numKeys = target.getPrimaryKeys().size();
    int[] keys = IntStream.range(0, numKeys).toArray();
    IConnectorDescriptor connectorDescriptor = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, target.getPrimaryHashFunctionFactories(metadataProvider)));
    spec.connect(connectorDescriptor, primaryScanOp, 0, upsertOp, 0);
    // Connects upsert and sink.
    spec.connect(new OneToOneConnectorDescriptor(spec), upsertOp, 0, commitOp, 0);
    // Executes the job.
    JobUtils.runJob(hcc, spec, true);
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) JobEventListenerFactory(org.apache.asterix.runtime.job.listener.JobEventListenerFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) JobId(org.apache.asterix.common.transactions.JobId) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)

Example 10 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class ActivityClusterGraphRewriter method rewriteIntraActivityCluster.

/**
     * rewrite an activity cluster internally
     *
     * @param ac
     *            the activity cluster to be rewritten
     */
private void rewriteIntraActivityCluster(ActivityCluster ac, Map<IActivity, SuperActivity> invertedActivitySuperActivityMap) {
    Map<ActivityId, IActivity> activities = ac.getActivityMap();
    Map<ActivityId, List<IConnectorDescriptor>> activityInputMap = ac.getActivityInputMap();
    Map<ActivityId, List<IConnectorDescriptor>> activityOutputMap = ac.getActivityOutputMap();
    Map<ConnectorDescriptorId, Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>>> connectorActivityMap = ac.getConnectorActivityMap();
    ActivityClusterGraph acg = ac.getActivityClusterGraph();
    Map<ActivityId, IActivity> startActivities = new HashMap<ActivityId, IActivity>();
    Map<ActivityId, SuperActivity> superActivities = new HashMap<ActivityId, SuperActivity>();
    Map<ActivityId, Queue<IActivity>> toBeExpendedMap = new HashMap<ActivityId, Queue<IActivity>>();
    /**
         * Build the initial super activities
         */
    for (Entry<ActivityId, IActivity> entry : activities.entrySet()) {
        ActivityId activityId = entry.getKey();
        IActivity activity = entry.getValue();
        if (activityInputMap.get(activityId) == null) {
            startActivities.put(activityId, activity);
            /**
                 * use the start activity's id as the id of the super activity
                 */
            createNewSuperActivity(ac, superActivities, toBeExpendedMap, invertedActivitySuperActivityMap, activityId, activity);
        }
    }
    /**
         * expand one-to-one connected activity cluster by the BFS order.
         * after the while-loop, the original activities are partitioned
         * into equivalent classes, one-per-super-activity.
         */
    Map<ActivityId, SuperActivity> clonedSuperActivities = new HashMap<ActivityId, SuperActivity>();
    while (toBeExpendedMap.size() > 0) {
        clonedSuperActivities.clear();
        clonedSuperActivities.putAll(superActivities);
        for (Entry<ActivityId, SuperActivity> entry : clonedSuperActivities.entrySet()) {
            ActivityId superActivityId = entry.getKey();
            SuperActivity superActivity = entry.getValue();
            /**
                 * for the case where the super activity has already been swallowed
                 */
            if (superActivities.get(superActivityId) == null) {
                continue;
            }
            /**
                 * expend the super activity
                 */
            Queue<IActivity> toBeExpended = toBeExpendedMap.get(superActivityId);
            if (toBeExpended == null) {
                /**
                     * Nothing to expand
                     */
                continue;
            }
            IActivity expendingActivity = toBeExpended.poll();
            List<IConnectorDescriptor> outputConnectors = activityOutputMap.get(expendingActivity.getActivityId());
            if (outputConnectors != null) {
                for (IConnectorDescriptor outputConn : outputConnectors) {
                    Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>> endPoints = connectorActivityMap.get(outputConn.getConnectorId());
                    IActivity newActivity = endPoints.getRight().getLeft();
                    SuperActivity existingSuperActivity = invertedActivitySuperActivityMap.get(newActivity);
                    if (outputConn.getClass().getName().contains(ONE_TO_ONE_CONNECTOR)) {
                        /**
                             * expend the super activity cluster on an one-to-one out-bound connection
                             */
                        if (existingSuperActivity == null) {
                            superActivity.addActivity(newActivity);
                            toBeExpended.add(newActivity);
                            invertedActivitySuperActivityMap.put(newActivity, superActivity);
                        } else {
                            /**
                                 * the two activities already in the same super activity
                                 */
                            if (existingSuperActivity == superActivity) {
                                continue;
                            }
                            /**
                                 * swallow an existing super activity
                                 */
                            swallowExistingSuperActivity(superActivities, toBeExpendedMap, invertedActivitySuperActivityMap, superActivity, superActivityId, existingSuperActivity);
                        }
                    } else {
                        if (existingSuperActivity == null) {
                            /**
                                 * create new activity
                                 */
                            createNewSuperActivity(ac, superActivities, toBeExpendedMap, invertedActivitySuperActivityMap, newActivity.getActivityId(), newActivity);
                        }
                    }
                }
            }
            /**
                 * remove the to-be-expended queue if it is empty
                 */
            if (toBeExpended.size() == 0) {
                toBeExpendedMap.remove(superActivityId);
            }
        }
    }
    Map<ConnectorDescriptorId, IConnectorDescriptor> connMap = ac.getConnectorMap();
    Map<ConnectorDescriptorId, RecordDescriptor> connRecordDesc = ac.getConnectorRecordDescriptorMap();
    Map<SuperActivity, Integer> superActivityProducerPort = new HashMap<SuperActivity, Integer>();
    Map<SuperActivity, Integer> superActivityConsumerPort = new HashMap<SuperActivity, Integer>();
    for (Entry<ActivityId, SuperActivity> entry : superActivities.entrySet()) {
        superActivityProducerPort.put(entry.getValue(), 0);
        superActivityConsumerPort.put(entry.getValue(), 0);
    }
    /**
         * create a new activity cluster to replace the old activity cluster
         */
    ActivityCluster newActivityCluster = new ActivityCluster(acg, ac.getId());
    newActivityCluster.setConnectorPolicyAssignmentPolicy(ac.getConnectorPolicyAssignmentPolicy());
    for (Entry<ActivityId, SuperActivity> entry : superActivities.entrySet()) {
        newActivityCluster.addActivity(entry.getValue());
        acg.getActivityMap().put(entry.getKey(), newActivityCluster);
    }
    /**
         * Setup connectors: either inside a super activity or among super activities
         */
    for (Entry<ConnectorDescriptorId, Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>>> entry : connectorActivityMap.entrySet()) {
        ConnectorDescriptorId connectorId = entry.getKey();
        Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>> endPoints = entry.getValue();
        IActivity producerActivity = endPoints.getLeft().getLeft();
        IActivity consumerActivity = endPoints.getRight().getLeft();
        int producerPort = endPoints.getLeft().getRight();
        int consumerPort = endPoints.getRight().getRight();
        RecordDescriptor recordDescriptor = connRecordDesc.get(connectorId);
        IConnectorDescriptor conn = connMap.get(connectorId);
        if (conn.getClass().getName().contains(ONE_TO_ONE_CONNECTOR)) {
            /**
                 * connection edge between inner activities
                 */
            SuperActivity residingSuperActivity = invertedActivitySuperActivityMap.get(producerActivity);
            residingSuperActivity.connect(conn, producerActivity, producerPort, consumerActivity, consumerPort, recordDescriptor);
        } else {
            /**
                 * connection edge between super activities
                 */
            SuperActivity producerSuperActivity = invertedActivitySuperActivityMap.get(producerActivity);
            SuperActivity consumerSuperActivity = invertedActivitySuperActivityMap.get(consumerActivity);
            int producerSAPort = superActivityProducerPort.get(producerSuperActivity);
            int consumerSAPort = superActivityConsumerPort.get(consumerSuperActivity);
            newActivityCluster.addConnector(conn);
            newActivityCluster.connect(conn, producerSuperActivity, producerSAPort, consumerSuperActivity, consumerSAPort, recordDescriptor);
            /**
                 * bridge the port
                 */
            producerSuperActivity.setClusterOutputIndex(producerSAPort, producerActivity.getActivityId(), producerPort);
            consumerSuperActivity.setClusterInputIndex(consumerSAPort, consumerActivity.getActivityId(), consumerPort);
            acg.getConnectorMap().put(connectorId, newActivityCluster);
            /**
                 * increasing the port number for the producer and consumer
                 */
            superActivityProducerPort.put(producerSuperActivity, ++producerSAPort);
            superActivityConsumerPort.put(consumerSuperActivity, ++consumerSAPort);
        }
    }
    /**
         * Set up the roots of the new activity cluster
         */
    for (Entry<ActivityId, SuperActivity> entry : superActivities.entrySet()) {
        List<IConnectorDescriptor> connIds = newActivityCluster.getActivityOutputMap().get(entry.getKey());
        if (connIds == null || connIds.size() == 0) {
            newActivityCluster.addRoot(entry.getValue());
        }
    }
    /**
         * set up the blocked2Blocker mapping, which will be updated in the rewriteInterActivityCluster call
         */
    newActivityCluster.getBlocked2BlockerMap().putAll(ac.getBlocked2BlockerMap());
    /**
         * replace the old activity cluster with the new activity cluster
         */
    acg.getActivityClusterMap().put(ac.getId(), newActivityCluster);
}
Also used : HashMap(java.util.HashMap) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) IActivity(org.apache.hyracks.api.dataflow.IActivity) SuperActivity(org.apache.hyracks.api.rewriter.runtime.SuperActivity) List(java.util.List) LinkedList(java.util.LinkedList) Queue(java.util.Queue) Pair(org.apache.commons.lang3.tuple.Pair) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph)

Aggregations

IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)72 JobSpecification (org.apache.hyracks.api.job.JobSpecification)45 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)40 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)40 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)39 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)37 Test (org.junit.Test)35 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)34 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)33 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)32 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)31 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)27 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)25 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)24 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)22 FileSplit (org.apache.hyracks.api.io.FileSplit)21 MultiFieldsAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory)20 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)19 IFieldAggregateDescriptorFactory (org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory)19 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)18