Examples with RecordDescriptor - org.apache.hyracks.api.dataflow.value.RecordDescriptor

Example 26 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class PushRuntimeTest method scanLimitWrite.

@Test
public void scanLimitWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    // the scanner
    FileSplit[] fileSplits = new FileSplit[1];
    fileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer.tbl");
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
    RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
    FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanner, DEFAULT_NODES);
    // the algebricks op.
    StreamLimitRuntimeFactory limit = new StreamLimitRuntimeFactory(new IntegerConstantEvalFactory(2), null, new int[] { 0 }, BinaryIntegerInspectorImpl.FACTORY);
    RecordDescriptor limitDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    String filePath = PATH_ACTUAL + SEPARATOR + "scanLimitWrite.out";
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, limitDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { limit, writer }, new RecordDescriptor[] { limitDesc, null });
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, algebricksOp, 0);
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    StringBuilder buf = new StringBuilder();
    readFileToString(outFile, buf);
    Assert.assertEquals("12", buf.toString());
    outFile.delete();
}

Also used : IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) File(java.io.File) StreamLimitRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.StreamLimitRuntimeFactory) Test(org.junit.Test)

Example 27 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class PushRuntimeTest method etsAssignScriptWrite.

@Test
public void etsAssignScriptWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    IntegerConstantEvalFactory const1 = new IntegerConstantEvalFactory(400);
    IntegerConstantEvalFactory const2 = new IntegerConstantEvalFactory(3);
    EmptyTupleSourceRuntimeFactory ets = new EmptyTupleSourceRuntimeFactory();
    RecordDescriptor etsDesc = new RecordDescriptor(new ISerializerDeserializer[] {});
    AssignRuntimeFactory assign = new AssignRuntimeFactory(new int[] { 0, 1 }, new IScalarEvaluatorFactory[] { const1, const2 }, new int[] { 0, 1 });
    RecordDescriptor assignDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    IValueParserFactory[] valueParsers = { IntegerParserFactory.INSTANCE, IntegerParserFactory.INSTANCE };
    String osname = System.getProperty("os.name");
    String command;
    if (osname.equals("Linux")) {
        command = "bash target/testscripts/idscript";
    } else if (osname.startsWith("Windows")) {
        command = "target\\testscripts\\idscript.cmd";
    } else {
        // don't know how to test
        return;
    }
    StringStreamingRuntimeFactory script = new StringStreamingRuntimeFactory(command, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE, IntegerPrinterFactory.INSTANCE }, ' ', new DelimitedDataTupleParserFactory(valueParsers, ' '));
    RecordDescriptor scriptDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    String filePath = PATH_ACTUAL + SEPARATOR + "etsAssignScriptWrite.out";
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0, 1 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE, IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, scriptDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 0, 0, new IPushRuntimeFactory[] { ets, assign, script, writer }, new RecordDescriptor[] { etsDesc, assignDesc, scriptDesc, null });
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    StringBuilder buf = new StringBuilder();
    readFileToString(outFile, buf);
    Assert.assertEquals("400; 3", buf.toString());
    outFile.delete();
}

Also used : IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) AssignRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.AssignRuntimeFactory) StringStreamingRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.StringStreamingRuntimeFactory) SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) EmptyTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.EmptyTupleSourceRuntimeFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) File(java.io.File) Test(org.junit.Test)

Example 28 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class ActivityClusterGraphRewriter method rewriteIntraActivityCluster.

/**
     * rewrite an activity cluster internally
     *
     * @param ac
     *            the activity cluster to be rewritten
     */
private void rewriteIntraActivityCluster(ActivityCluster ac, Map<IActivity, SuperActivity> invertedActivitySuperActivityMap) {
    Map<ActivityId, IActivity> activities = ac.getActivityMap();
    Map<ActivityId, List<IConnectorDescriptor>> activityInputMap = ac.getActivityInputMap();
    Map<ActivityId, List<IConnectorDescriptor>> activityOutputMap = ac.getActivityOutputMap();
    Map<ConnectorDescriptorId, Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>>> connectorActivityMap = ac.getConnectorActivityMap();
    ActivityClusterGraph acg = ac.getActivityClusterGraph();
    Map<ActivityId, IActivity> startActivities = new HashMap<ActivityId, IActivity>();
    Map<ActivityId, SuperActivity> superActivities = new HashMap<ActivityId, SuperActivity>();
    Map<ActivityId, Queue<IActivity>> toBeExpendedMap = new HashMap<ActivityId, Queue<IActivity>>();
    /**
         * Build the initial super activities
         */
    for (Entry<ActivityId, IActivity> entry : activities.entrySet()) {
        ActivityId activityId = entry.getKey();
        IActivity activity = entry.getValue();
        if (activityInputMap.get(activityId) == null) {
            startActivities.put(activityId, activity);
            /**
                 * use the start activity's id as the id of the super activity
                 */
            createNewSuperActivity(ac, superActivities, toBeExpendedMap, invertedActivitySuperActivityMap, activityId, activity);
        }
    }
    /**
         * expand one-to-one connected activity cluster by the BFS order.
         * after the while-loop, the original activities are partitioned
         * into equivalent classes, one-per-super-activity.
         */
    Map<ActivityId, SuperActivity> clonedSuperActivities = new HashMap<ActivityId, SuperActivity>();
    while (toBeExpendedMap.size() > 0) {
        clonedSuperActivities.clear();
        clonedSuperActivities.putAll(superActivities);
        for (Entry<ActivityId, SuperActivity> entry : clonedSuperActivities.entrySet()) {
            ActivityId superActivityId = entry.getKey();
            SuperActivity superActivity = entry.getValue();
            /**
                 * for the case where the super activity has already been swallowed
                 */
            if (superActivities.get(superActivityId) == null) {
                continue;
            }
            /**
                 * expend the super activity
                 */
            Queue<IActivity> toBeExpended = toBeExpendedMap.get(superActivityId);
            if (toBeExpended == null) {
                /**
                     * Nothing to expand
                     */
                continue;
            }
            IActivity expendingActivity = toBeExpended.poll();
            List<IConnectorDescriptor> outputConnectors = activityOutputMap.get(expendingActivity.getActivityId());
            if (outputConnectors != null) {
                for (IConnectorDescriptor outputConn : outputConnectors) {
                    Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>> endPoints = connectorActivityMap.get(outputConn.getConnectorId());
                    IActivity newActivity = endPoints.getRight().getLeft();
                    SuperActivity existingSuperActivity = invertedActivitySuperActivityMap.get(newActivity);
                    if (outputConn.getClass().getName().contains(ONE_TO_ONE_CONNECTOR)) {
                        /**
                             * expend the super activity cluster on an one-to-one out-bound connection
                             */
                        if (existingSuperActivity == null) {
                            superActivity.addActivity(newActivity);
                            toBeExpended.add(newActivity);
                            invertedActivitySuperActivityMap.put(newActivity, superActivity);
                        } else {
                            /**
                                 * the two activities already in the same super activity
                                 */
                            if (existingSuperActivity == superActivity) {
                                continue;
                            }
                            /**
                                 * swallow an existing super activity
                                 */
                            swallowExistingSuperActivity(superActivities, toBeExpendedMap, invertedActivitySuperActivityMap, superActivity, superActivityId, existingSuperActivity);
                        }
                    } else {
                        if (existingSuperActivity == null) {
                            /**
                                 * create new activity
                                 */
                            createNewSuperActivity(ac, superActivities, toBeExpendedMap, invertedActivitySuperActivityMap, newActivity.getActivityId(), newActivity);
                        }
                    }
                }
            }
            /**
                 * remove the to-be-expended queue if it is empty
                 */
            if (toBeExpended.size() == 0) {
                toBeExpendedMap.remove(superActivityId);
            }
        }
    }
    Map<ConnectorDescriptorId, IConnectorDescriptor> connMap = ac.getConnectorMap();
    Map<ConnectorDescriptorId, RecordDescriptor> connRecordDesc = ac.getConnectorRecordDescriptorMap();
    Map<SuperActivity, Integer> superActivityProducerPort = new HashMap<SuperActivity, Integer>();
    Map<SuperActivity, Integer> superActivityConsumerPort = new HashMap<SuperActivity, Integer>();
    for (Entry<ActivityId, SuperActivity> entry : superActivities.entrySet()) {
        superActivityProducerPort.put(entry.getValue(), 0);
        superActivityConsumerPort.put(entry.getValue(), 0);
    }
    /**
         * create a new activity cluster to replace the old activity cluster
         */
    ActivityCluster newActivityCluster = new ActivityCluster(acg, ac.getId());
    newActivityCluster.setConnectorPolicyAssignmentPolicy(ac.getConnectorPolicyAssignmentPolicy());
    for (Entry<ActivityId, SuperActivity> entry : superActivities.entrySet()) {
        newActivityCluster.addActivity(entry.getValue());
        acg.getActivityMap().put(entry.getKey(), newActivityCluster);
    }
    /**
         * Setup connectors: either inside a super activity or among super activities
         */
    for (Entry<ConnectorDescriptorId, Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>>> entry : connectorActivityMap.entrySet()) {
        ConnectorDescriptorId connectorId = entry.getKey();
        Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>> endPoints = entry.getValue();
        IActivity producerActivity = endPoints.getLeft().getLeft();
        IActivity consumerActivity = endPoints.getRight().getLeft();
        int producerPort = endPoints.getLeft().getRight();
        int consumerPort = endPoints.getRight().getRight();
        RecordDescriptor recordDescriptor = connRecordDesc.get(connectorId);
        IConnectorDescriptor conn = connMap.get(connectorId);
        if (conn.getClass().getName().contains(ONE_TO_ONE_CONNECTOR)) {
            /**
                 * connection edge between inner activities
                 */
            SuperActivity residingSuperActivity = invertedActivitySuperActivityMap.get(producerActivity);
            residingSuperActivity.connect(conn, producerActivity, producerPort, consumerActivity, consumerPort, recordDescriptor);
        } else {
            /**
                 * connection edge between super activities
                 */
            SuperActivity producerSuperActivity = invertedActivitySuperActivityMap.get(producerActivity);
            SuperActivity consumerSuperActivity = invertedActivitySuperActivityMap.get(consumerActivity);
            int producerSAPort = superActivityProducerPort.get(producerSuperActivity);
            int consumerSAPort = superActivityConsumerPort.get(consumerSuperActivity);
            newActivityCluster.addConnector(conn);
            newActivityCluster.connect(conn, producerSuperActivity, producerSAPort, consumerSuperActivity, consumerSAPort, recordDescriptor);
            /**
                 * bridge the port
                 */
            producerSuperActivity.setClusterOutputIndex(producerSAPort, producerActivity.getActivityId(), producerPort);
            consumerSuperActivity.setClusterInputIndex(consumerSAPort, consumerActivity.getActivityId(), consumerPort);
            acg.getConnectorMap().put(connectorId, newActivityCluster);
            /**
                 * increasing the port number for the producer and consumer
                 */
            superActivityProducerPort.put(producerSuperActivity, ++producerSAPort);
            superActivityConsumerPort.put(consumerSuperActivity, ++consumerSAPort);
        }
    }
    /**
         * Set up the roots of the new activity cluster
         */
    for (Entry<ActivityId, SuperActivity> entry : superActivities.entrySet()) {
        List<IConnectorDescriptor> connIds = newActivityCluster.getActivityOutputMap().get(entry.getKey());
        if (connIds == null || connIds.size() == 0) {
            newActivityCluster.addRoot(entry.getValue());
        }
    }
    /**
         * set up the blocked2Blocker mapping, which will be updated in the rewriteInterActivityCluster call
         */
    newActivityCluster.getBlocked2BlockerMap().putAll(ac.getBlocked2BlockerMap());
    /**
         * replace the old activity cluster with the new activity cluster
         */
    acg.getActivityClusterMap().put(ac.getId(), newActivityCluster);
}

Also used : HashMap(java.util.HashMap) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) IActivity(org.apache.hyracks.api.dataflow.IActivity) SuperActivity(org.apache.hyracks.api.rewriter.runtime.SuperActivity) List(java.util.List) LinkedList(java.util.LinkedList) Queue(java.util.Queue) Pair(org.apache.commons.lang3.tuple.Pair) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph)

Example 29 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class PushRuntimeTest method etsUnnestWrite.

@Test
public void etsUnnestWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    EmptyTupleSourceRuntimeFactory ets = new EmptyTupleSourceRuntimeFactory();
    RecordDescriptor etsDesc = new RecordDescriptor(new ISerializerDeserializer[] {});
    IUnnestingEvaluatorFactory aggregFactory = new IntArrayUnnester(new int[] { 100, 200, 300 });
    UnnestRuntimeFactory unnest = new UnnestRuntimeFactory(0, aggregFactory, new int[] { 0 }, false, null);
    RecordDescriptor unnestDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    String filePath = PATH_ACTUAL + SEPARATOR + "etsUnnestWrite.out";
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, unnestDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 0, 0, new IPushRuntimeFactory[] { ets, unnest, writer }, new RecordDescriptor[] { etsDesc, unnestDesc, null });
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    StringBuilder buf = new StringBuilder();
    readFileToString(outFile, buf);
    Assert.assertEquals("100200300", buf.toString());
    outFile.delete();
}

Also used : SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) EmptyTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.EmptyTupleSourceRuntimeFactory) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) UnnestRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.UnnestRuntimeFactory) IUnnestingEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IUnnestingEvaluatorFactory) File(java.io.File) Test(org.junit.Test)

Example 30 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class PushRuntimeTest method scanSortGbySelectWrite.

@Test
public void scanSortGbySelectWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    // the scanner
    FileSplit[] fileSplits = new FileSplit[1];
    fileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer.tbl");
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
    RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
    FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanner, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    // the sort (by nation id)
    RecordDescriptor sortDesc = scannerDesc;
    InMemorySortOperatorDescriptor sort = new InMemorySortOperatorDescriptor(spec, new int[] { 3 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, sortDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sort, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    // the group-by
    NestedTupleSourceRuntimeFactory nts = new NestedTupleSourceRuntimeFactory();
    RecordDescriptor ntsDesc = sortDesc;
    AggregateRuntimeFactory agg = new AggregateRuntimeFactory(new IAggregateEvaluatorFactory[] { new TupleCountAggregateFunctionFactory() });
    RecordDescriptor aggDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    AlgebricksPipeline pipeline = new AlgebricksPipeline(new IPushRuntimeFactory[] { nts, agg }, new RecordDescriptor[] { ntsDesc, aggDesc });
    NestedPlansAccumulatingAggregatorFactory npaaf = new NestedPlansAccumulatingAggregatorFactory(new AlgebricksPipeline[] { pipeline }, new int[] { 3 }, new int[] {});
    RecordDescriptor gbyDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    PreclusteredGroupOperatorDescriptor gby = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 3 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, npaaf, gbyDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, gby, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    // the algebricks op.
    IScalarEvaluatorFactory cond = new IntegerEqualsEvalFactory(new IntegerConstantEvalFactory(3), // Canadian customers
    new TupleFieldEvaluatorFactory(0));
    StreamSelectRuntimeFactory select = new StreamSelectRuntimeFactory(cond, new int[] { 1 }, BinaryBooleanInspectorImpl.FACTORY, false, -1, null);
    RecordDescriptor selectDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    String filePath = PATH_ACTUAL + SEPARATOR + "scanSortGbySelectWrite.out";
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, selectDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { select, writer }, new RecordDescriptor[] { selectDesc, null });
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, sort, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sort, 0, gby, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), gby, 0, algebricksOp, 0);
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    StringBuilder buf = new StringBuilder();
    readFileToString(outFile, buf);
    Assert.assertEquals("9", buf.toString());
    outFile.delete();
}

Also used : AggregateRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.AggregateRuntimeFactory) RunningAggregateRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.RunningAggregateRuntimeFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) TupleCountAggregateFunctionFactory(org.apache.hyracks.algebricks.runtime.aggregators.TupleCountAggregateFunctionFactory) NestedTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.NestedTupleSourceRuntimeFactory) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) AlgebricksPipeline(org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline) TupleFieldEvaluatorFactory(org.apache.hyracks.algebricks.runtime.evaluators.TupleFieldEvaluatorFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) InMemorySortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) NestedPlansAccumulatingAggregatorFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.NestedPlansAccumulatingAggregatorFactory) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory) StreamSelectRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.StreamSelectRuntimeFactory) SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) PreclusteredGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor) File(java.io.File) Test(org.junit.Test)

Aggregations

RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)169 JobSpecification (org.apache.hyracks.api.job.JobSpecification)90 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)74 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)72 Test (org.junit.Test)69 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)64 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)55 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)53 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)52 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)41 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)40 FileSplit (org.apache.hyracks.api.io.FileSplit)38 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)37 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)36 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)35 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)35 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)33 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)31 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)27 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)25