Search in sources :

Example 6 with DelimitedDataTupleParserFactory

use of org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory in project asterixdb by apache.

the class PushRuntimeTest method scanLimitWrite.

@Test
public void scanLimitWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    // the scanner
    FileSplit[] fileSplits = new FileSplit[1];
    fileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer.tbl");
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
    RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
    FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanner, DEFAULT_NODES);
    // the algebricks op.
    StreamLimitRuntimeFactory limit = new StreamLimitRuntimeFactory(new IntegerConstantEvalFactory(2), null, new int[] { 0 }, BinaryIntegerInspectorImpl.FACTORY);
    RecordDescriptor limitDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    String filePath = PATH_ACTUAL + SEPARATOR + "scanLimitWrite.out";
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, limitDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { limit, writer }, new RecordDescriptor[] { limitDesc, null });
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, algebricksOp, 0);
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    StringBuilder buf = new StringBuilder();
    readFileToString(outFile, buf);
    Assert.assertEquals("12", buf.toString());
    outFile.delete();
}
Also used : IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) File(java.io.File) StreamLimitRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.StreamLimitRuntimeFactory) Test(org.junit.Test)

Example 7 with DelimitedDataTupleParserFactory

use of org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory in project asterixdb by apache.

the class PushRuntimeTest method etsAssignScriptWrite.

@Test
public void etsAssignScriptWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    IntegerConstantEvalFactory const1 = new IntegerConstantEvalFactory(400);
    IntegerConstantEvalFactory const2 = new IntegerConstantEvalFactory(3);
    EmptyTupleSourceRuntimeFactory ets = new EmptyTupleSourceRuntimeFactory();
    RecordDescriptor etsDesc = new RecordDescriptor(new ISerializerDeserializer[] {});
    AssignRuntimeFactory assign = new AssignRuntimeFactory(new int[] { 0, 1 }, new IScalarEvaluatorFactory[] { const1, const2 }, new int[] { 0, 1 });
    RecordDescriptor assignDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    IValueParserFactory[] valueParsers = { IntegerParserFactory.INSTANCE, IntegerParserFactory.INSTANCE };
    String osname = System.getProperty("os.name");
    String command;
    if (osname.equals("Linux")) {
        command = "bash target/testscripts/idscript";
    } else if (osname.startsWith("Windows")) {
        command = "target\\testscripts\\idscript.cmd";
    } else {
        // don't know how to test
        return;
    }
    StringStreamingRuntimeFactory script = new StringStreamingRuntimeFactory(command, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE, IntegerPrinterFactory.INSTANCE }, ' ', new DelimitedDataTupleParserFactory(valueParsers, ' '));
    RecordDescriptor scriptDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    String filePath = PATH_ACTUAL + SEPARATOR + "etsAssignScriptWrite.out";
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0, 1 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE, IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, scriptDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 0, 0, new IPushRuntimeFactory[] { ets, assign, script, writer }, new RecordDescriptor[] { etsDesc, assignDesc, scriptDesc, null });
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    StringBuilder buf = new StringBuilder();
    readFileToString(outFile, buf);
    Assert.assertEquals("400; 3", buf.toString());
    outFile.delete();
}
Also used : IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) AssignRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.AssignRuntimeFactory) StringStreamingRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.StringStreamingRuntimeFactory) SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) EmptyTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.EmptyTupleSourceRuntimeFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) File(java.io.File) Test(org.junit.Test)

Example 8 with DelimitedDataTupleParserFactory

use of org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory in project asterixdb by apache.

the class PushRuntimeTest method scanSortGbySelectWrite.

@Test
public void scanSortGbySelectWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    // the scanner
    FileSplit[] fileSplits = new FileSplit[1];
    fileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer.tbl");
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
    RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
    FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanner, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    // the sort (by nation id)
    RecordDescriptor sortDesc = scannerDesc;
    InMemorySortOperatorDescriptor sort = new InMemorySortOperatorDescriptor(spec, new int[] { 3 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, sortDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sort, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    // the group-by
    NestedTupleSourceRuntimeFactory nts = new NestedTupleSourceRuntimeFactory();
    RecordDescriptor ntsDesc = sortDesc;
    AggregateRuntimeFactory agg = new AggregateRuntimeFactory(new IAggregateEvaluatorFactory[] { new TupleCountAggregateFunctionFactory() });
    RecordDescriptor aggDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    AlgebricksPipeline pipeline = new AlgebricksPipeline(new IPushRuntimeFactory[] { nts, agg }, new RecordDescriptor[] { ntsDesc, aggDesc });
    NestedPlansAccumulatingAggregatorFactory npaaf = new NestedPlansAccumulatingAggregatorFactory(new AlgebricksPipeline[] { pipeline }, new int[] { 3 }, new int[] {});
    RecordDescriptor gbyDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    PreclusteredGroupOperatorDescriptor gby = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 3 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, npaaf, gbyDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, gby, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    // the algebricks op.
    IScalarEvaluatorFactory cond = new IntegerEqualsEvalFactory(new IntegerConstantEvalFactory(3), // Canadian customers
    new TupleFieldEvaluatorFactory(0));
    StreamSelectRuntimeFactory select = new StreamSelectRuntimeFactory(cond, new int[] { 1 }, BinaryBooleanInspectorImpl.FACTORY, false, -1, null);
    RecordDescriptor selectDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    String filePath = PATH_ACTUAL + SEPARATOR + "scanSortGbySelectWrite.out";
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, selectDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { select, writer }, new RecordDescriptor[] { selectDesc, null });
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, sort, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sort, 0, gby, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), gby, 0, algebricksOp, 0);
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    StringBuilder buf = new StringBuilder();
    readFileToString(outFile, buf);
    Assert.assertEquals("9", buf.toString());
    outFile.delete();
}
Also used : AggregateRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.AggregateRuntimeFactory) RunningAggregateRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.RunningAggregateRuntimeFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) TupleCountAggregateFunctionFactory(org.apache.hyracks.algebricks.runtime.aggregators.TupleCountAggregateFunctionFactory) NestedTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.NestedTupleSourceRuntimeFactory) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) AlgebricksPipeline(org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline) TupleFieldEvaluatorFactory(org.apache.hyracks.algebricks.runtime.evaluators.TupleFieldEvaluatorFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) InMemorySortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) NestedPlansAccumulatingAggregatorFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.NestedPlansAccumulatingAggregatorFactory) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory) StreamSelectRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.StreamSelectRuntimeFactory) SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) PreclusteredGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor) File(java.io.File) Test(org.junit.Test)

Example 9 with DelimitedDataTupleParserFactory

use of org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory in project asterixdb by apache.

the class AbstractRTreeOperatorTest method insertPipeline.

protected void insertPipeline() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "orders-with-locations-part2.txt") };
    IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, DoubleParserFactory.INSTANCE, DoubleParserFactory.INSTANCE, DoubleParserFactory.INSTANCE, DoubleParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID);
    // insert into primary index
    int[] primaryFieldPermutation = { 0, 1, 2, 4, 5, 7, 9, 10, 11, 12 };
    TreeIndexInsertUpdateDeleteOperatorDescriptor primaryInsertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, ordersDesc, primaryFieldPermutation, IndexOperation.INSERT, primaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryInsertOp, NC1_ID);
    // secondary index
    int[] secondaryFieldPermutation = { 9, 10, 11, 12, 0 };
    TreeIndexInsertUpdateDeleteOperatorDescriptor secondaryInsertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, ordersDesc, secondaryFieldPermutation, IndexOperation.INSERT, secondaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryInsertOp, NC1_ID);
    NullSinkOperatorDescriptor nullSink = new NullSinkOperatorDescriptor(spec);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, nullSink, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, primaryInsertOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primaryInsertOp, 0, secondaryInsertOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), secondaryInsertOp, 0, nullSink, 0);
    spec.addRoot(nullSink);
    runTest(spec);
}
Also used : NullSinkOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) TreeIndexInsertUpdateDeleteOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexInsertUpdateDeleteOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification)

Example 10 with DelimitedDataTupleParserFactory

use of org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory in project asterixdb by apache.

the class SleepOperatorDescriptor method jobWithSleepOp.

private JobSpecification jobWithSleepOp() {
    JobSpecification spec = new JobSpecification();
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(ASTERIX_IDS[0], "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl") };
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor recordDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    // File scan operator.
    FileScanOperatorDescriptor scanOp = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), recordDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanOp, ASTERIX_IDS[0]);
    // Sleep operator.
    SleepOperatorDescriptor sleepOp = new SleepOperatorDescriptor(spec);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sleepOp, ASTERIX_IDS);
    // Sink operator.
    SinkOperatorDescriptor sinkOp = new SinkOperatorDescriptor(spec, 1);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sinkOp, ASTERIX_IDS);
    // Hash-repartitioning connector.
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, scanOp, 0, sleepOp, 0);
    // One-to-one connector.
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, sleepOp, 0, sinkOp, 0);
    return spec;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) SinkOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.SinkOperatorDescriptor) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification)

Aggregations

DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)46 JobSpecification (org.apache.hyracks.api.job.JobSpecification)45 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)45 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)45 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)43 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)41 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)41 FileSplit (org.apache.hyracks.api.io.FileSplit)39 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)39 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)36 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)36 Test (org.junit.Test)34 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)33 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)24 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)24 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)23 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)18 MToNBroadcastConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor)18 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)12 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)12