Examples with UTF8StringSerializerDeserializer - org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer

Example 81 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class PushRuntimeTest method scanMicroSortGbySelectWrite.

@Test
public void scanMicroSortGbySelectWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    // the scanner
    FileSplit[] fileSplits = new FileSplit[1];
    fileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer.tbl");
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
    RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
    FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanner, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    // the sort (by nation id)
    RecordDescriptor sortDesc = scannerDesc;
    InMemorySortRuntimeFactory sort = new InMemorySortRuntimeFactory(new int[] { 3 }, null, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, null);
    // the group-by
    NestedTupleSourceRuntimeFactory nts = new NestedTupleSourceRuntimeFactory();
    RecordDescriptor ntsDesc = sortDesc;
    AggregateRuntimeFactory agg = new AggregateRuntimeFactory(new IAggregateEvaluatorFactory[] { new TupleCountAggregateFunctionFactory() });
    RecordDescriptor aggDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    AlgebricksPipeline pipeline = new AlgebricksPipeline(new IPushRuntimeFactory[] { nts, agg }, new RecordDescriptor[] { ntsDesc, aggDesc });
    NestedPlansAccumulatingAggregatorFactory npaaf = new NestedPlansAccumulatingAggregatorFactory(new AlgebricksPipeline[] { pipeline }, new int[] { 3 }, new int[] {});
    RecordDescriptor gbyDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    MicroPreClusteredGroupRuntimeFactory gby = new MicroPreClusteredGroupRuntimeFactory(new int[] { 3 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, npaaf, sortDesc, gbyDesc, null);
    // the algebricks op.
    IScalarEvaluatorFactory cond = new IntegerEqualsEvalFactory(new IntegerConstantEvalFactory(3), // Canadian customers
    new TupleFieldEvaluatorFactory(0));
    StreamSelectRuntimeFactory select = new StreamSelectRuntimeFactory(cond, new int[] { 1 }, BinaryBooleanInspectorImpl.FACTORY, false, -1, null);
    RecordDescriptor selectDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    String filePath = PATH_ACTUAL + SEPARATOR + "scanSortGbySelectWrite.out";
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, selectDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { sort, gby, select, writer }, new RecordDescriptor[] { sortDesc, gbyDesc, selectDesc, null });
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, algebricksOp, 0);
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    StringBuilder buf = new StringBuilder();
    readFileToString(outFile, buf);
    Assert.assertEquals("9", buf.toString());
    outFile.delete();
}

Also used : InMemorySortRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.sort.InMemorySortRuntimeFactory) AggregateRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.AggregateRuntimeFactory) RunningAggregateRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.RunningAggregateRuntimeFactory) MicroPreClusteredGroupRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.group.MicroPreClusteredGroupRuntimeFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) TupleCountAggregateFunctionFactory(org.apache.hyracks.algebricks.runtime.aggregators.TupleCountAggregateFunctionFactory) NestedTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.NestedTupleSourceRuntimeFactory) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) AlgebricksPipeline(org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline) TupleFieldEvaluatorFactory(org.apache.hyracks.algebricks.runtime.evaluators.TupleFieldEvaluatorFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) NestedPlansAccumulatingAggregatorFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.NestedPlansAccumulatingAggregatorFactory) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory) StreamSelectRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.StreamSelectRuntimeFactory) SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) File(java.io.File) Test(org.junit.Test)

Example 82 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class PushRuntimeTest method scanAggregateWrite.

@Test
public void scanAggregateWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    // the scanner
    FileSplit[] fileSplits = new FileSplit[1];
    fileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer-part1.tbl");
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
    RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
    FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanner, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    // the algebricks op.
    AggregateRuntimeFactory agg = new AggregateRuntimeFactory(new IAggregateEvaluatorFactory[] { new TupleCountAggregateFunctionFactory() });
    RecordDescriptor aggDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    String filePath = PATH_ACTUAL + SEPARATOR + "scanAggregateWrite.out";
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, aggDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { agg, writer }, new RecordDescriptor[] { aggDesc, null });
    PartitionConstraintHelper.addPartitionCountConstraint(spec, algebricksOp, 1);
    spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, algebricksOp, 0);
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    StringBuilder buf = new StringBuilder();
    readFileToString(outFile, buf);
    Assert.assertEquals("75", buf.toString());
    outFile.delete();
}

Also used : AggregateRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.AggregateRuntimeFactory) RunningAggregateRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.RunningAggregateRuntimeFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) TupleCountAggregateFunctionFactory(org.apache.hyracks.algebricks.runtime.aggregators.TupleCountAggregateFunctionFactory) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) File(java.io.File) Test(org.junit.Test)

Example 83 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class PushRuntimeTest method scanReplicateWrite.

@Test
public void scanReplicateWrite() throws Exception {
    final int outputArity = 2;
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    String inputFileName = "data" + File.separator + "tpch0.001" + File.separator + "customer.tbl";
    FileSplit[] inputSplits = new FileSplit[] { new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, inputFileName) };
    DelimitedDataTupleParserFactory stringParser = new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, '');
    RecordDescriptor stringRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor scanOp = new FileScanOperatorDescriptor(spec, new ConstantFileSplitProvider(inputSplits), stringParser, stringRec);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    ReplicateOperatorDescriptor replicateOp = new ReplicateOperatorDescriptor(spec, stringRec, outputArity);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, replicateOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    IOperatorDescriptor[] outputOp = new IOperatorDescriptor[outputArity];
    File[] outputFile = new File[outputArity];
    for (int i = 0; i < outputArity; i++) {
        FileSplit fileSplit = createFile(AlgebricksHyracksIntegrationUtil.nc1);
        outputFile[i] = fileSplit.getFile(AlgebricksHyracksIntegrationUtil.nc1.getIoManager());
        outputOp[i] = new LineFileWriteOperatorDescriptor(spec, new FileSplit[] { fileSplit });
        PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, outputOp[i], new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    }
    spec.connect(new OneToOneConnectorDescriptor(spec), scanOp, 0, replicateOp, 0);
    for (int i = 0; i < outputArity; i++) {
        spec.connect(new OneToOneConnectorDescriptor(spec), replicateOp, i, outputOp[i], 0);
    }
    for (int i = 0; i < outputArity; i++) {
        spec.addRoot(outputOp[i]);
    }
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    for (int i = 0; i < outputArity; i++) {
        compareFiles("data" + File.separator + "device0" + File.separator + inputFileName, outputFile[i].getAbsolutePath());
    }
}

Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) LineFileWriteOperatorDescriptor(org.apache.hyracks.dataflow.std.file.LineFileWriteOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) ReplicateOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ReplicateOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) File(java.io.File) Test(org.junit.Test)

Example 84 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class AbstractRunGeneratorTest method prepareData.

static void prepareData(IHyracksTaskContext ctx, List<IFrame> frameList, int minDataSize, int minRecordSize, int maxRecordSize, Map<Integer, String> specialData, Map<Integer, String> keyValuePair) throws HyracksDataException {
    ArrayTupleBuilder tb = new ArrayTupleBuilder(RecordDesc.getFieldCount());
    FrameTupleAppender appender = new FrameTupleAppender();
    int datasize = 0;
    if (specialData != null) {
        for (Map.Entry<Integer, String> entry : specialData.entrySet()) {
            tb.reset();
            tb.addField(IntegerSerializerDeserializer.INSTANCE, entry.getKey());
            tb.addField(new UTF8StringSerializerDeserializer(), entry.getValue());
            VSizeFrame frame = new VSizeFrame(ctx, FrameHelper.calcAlignedFrameSizeToStore(tb.getFieldEndOffsets().length, tb.getSize(), ctx.getInitialFrameSize()));
            appender.reset(frame, true);
            assertTrue(appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize()));
            frameList.add(frame);
            datasize += frame.getFrameSize();
        }
        keyValuePair.putAll(specialData);
    }
    VSizeFrame frame = new VSizeFrame(ctx, ctx.getInitialFrameSize());
    appender.reset(frame, true);
    while (datasize < minDataSize) {
        tb.reset();
        int key = GRandom.nextInt(minDataSize + 1);
        if (!keyValuePair.containsKey(key)) {
            String value = generateRandomRecord(minRecordSize, maxRecordSize);
            tb.addField(IntegerSerializerDeserializer.INSTANCE, key);
            tb.addField(new UTF8StringSerializerDeserializer(), value);
            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                frameList.add(frame);
                datasize += frame.getFrameSize();
                frame = new VSizeFrame(ctx, FrameHelper.calcAlignedFrameSizeToStore(tb.getFieldEndOffsets().length, tb.getSize(), ctx.getInitialFrameSize()));
                appender.reset(frame, true);
                assertTrue(appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize()));
            }
            keyValuePair.put(key, value);
        }
    }
    if (appender.getTupleCount() > 0) {
        frameList.add(frame);
    }
}

Also used : FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) HashMap(java.util.HashMap) Map(java.util.Map) GroupVSizeFrame(org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame)

Example 85 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class ReplicateOperatorTest method test.

@Test
public void test() throws Exception {
    final int outputArity = 2;
    JobSpecification spec = new JobSpecification();
    String inputFileName = "data" + File.separator + "nc1" + File.separator + "words.txt";
    File[] outputFile = new File[outputArity];
    for (int i = 0; i < outputArity; i++) {
        outputFile[i] = File.createTempFile("replicateop", null);
        outputFile[i].deleteOnExit();
    }
    FileSplit[] inputSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, inputFileName) };
    String[] locations = new String[] { NC1_ID };
    DelimitedDataTupleParserFactory stringParser = new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, '');
    RecordDescriptor stringRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor scanOp = new FileScanOperatorDescriptor(spec, new ConstantFileSplitProvider(inputSplits), stringParser, stringRec);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanOp, locations);
    ReplicateOperatorDescriptor replicateOp = new ReplicateOperatorDescriptor(spec, stringRec, outputArity);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, replicateOp, locations);
    IOperatorDescriptor[] outputOp = new IOperatorDescriptor[outputFile.length];
    for (int i = 0; i < outputArity; i++) {
        ResultSetId rsId = new ResultSetId(i);
        spec.addResultSetId(rsId);
        outputOp[i] = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
        PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, outputOp[i], locations);
    }
    spec.connect(new OneToOneConnectorDescriptor(spec), scanOp, 0, replicateOp, 0);
    for (int i = 0; i < outputArity; i++) {
        spec.connect(new OneToOneConnectorDescriptor(spec), replicateOp, i, outputOp[i], 0);
    }
    for (int i = 0; i < outputArity; i++) {
        spec.addRoot(outputOp[i]);
    }
    String[] expectedResultsFileNames = new String[outputArity];
    for (int i = 0; i < outputArity; i++) {
        expectedResultsFileNames[i] = "data" + File.separator + "device0" + File.separator + inputFileName;
    }
    runTestAndCompareResults(spec, expectedResultsFileNames);
}

Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) ReplicateOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ReplicateOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) File(java.io.File) Test(org.junit.Test)

Aggregations

UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)94 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)74 Test (org.junit.Test)69 JobSpecification (org.apache.hyracks.api.job.JobSpecification)67 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)62 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)51 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)48 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)45 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)37 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)37 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)36 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)34 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)34 FileSplit (org.apache.hyracks.api.io.FileSplit)33 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)33 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)31 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)28 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)26 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)24 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)24