Search in sources :

Example 1 with MToNPartitioningMergingConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor in project asterixdb by apache.

the class HeapSortMergeTest method optimizedSortMergeTest01.

@Test
public void optimizedSortMergeTest01() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
    IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
    // larger than the total record numbers.
    int outputLimit = 5;
    TopKSorterOperatorDescriptor sorter = new TopKSorterOperatorDescriptor(spec, 4, outputLimit, new int[] { 1, 0 }, null, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    FileSplit fs = createFile(nc1);
    IFileSplitProvider outputSplitProvider = new ConstantFileSplitProvider(new FileSplit[] { fs });
    IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outputSplitProvider, "|");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
    spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1, 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), new int[] { 1, 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory()), sorter, 0, printer, 0);
    runTest(spec);
    System.out.println("Result write into :" + fs.getPath() + " in node: " + fs.getNodeName());
}
Also used : UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) TopKSorterOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.TopKSorterOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 2 with MToNPartitioningMergingConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor in project asterixdb by apache.

the class HeapSortMergeTest method createSortMergeJobSpec.

public static JobSpecification createSortMergeJobSpec() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
    IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
    int outputLimit = 20;
    TopKSorterOperatorDescriptor sorter = new TopKSorterOperatorDescriptor(spec, 4, outputLimit, new int[] { 1, 0 }, null, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID);
    LimitOperatorDescriptor filter = new LimitOperatorDescriptor(spec, ordersDesc, outputLimit);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, filter, NC1_ID);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
    spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1, 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), new int[] { 1, 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory()), sorter, 0, filter, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), filter, 0, printer, 0);
    return spec;
}
Also used : UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) TopKSorterOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.TopKSorterOperatorDescriptor) LimitOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.LimitOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification)

Example 3 with MToNPartitioningMergingConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor in project asterixdb by apache.

the class RangePartitionMergeExchangePOperator method createConnectorDescriptor.

@Override
public Pair<IConnectorDescriptor, TargetConstraint> createConnectorDescriptor(IConnectorDescriptorRegistry spec, ILogicalOperator op, IOperatorSchema opSchema, JobGenContext context) throws AlgebricksException {
    int n = partitioningFields.size();
    int[] sortFields = new int[n];
    IBinaryComparatorFactory[] comps = new IBinaryComparatorFactory[n];
    INormalizedKeyComputerFactoryProvider nkcfProvider = context.getNormalizedKeyComputerFactoryProvider();
    INormalizedKeyComputerFactory nkcf = null;
    IVariableTypeEnvironment env = context.getTypeEnvironment(op);
    int i = 0;
    for (OrderColumn oc : partitioningFields) {
        LogicalVariable var = oc.getColumn();
        sortFields[i] = opSchema.findVariable(var);
        Object type = env.getVarType(var);
        OrderKind order = oc.getOrder();
        if (i == 0 && nkcfProvider != null && type != null) {
            nkcf = nkcfProvider.getNormalizedKeyComputerFactory(type, order == OrderKind.ASC);
        }
        IBinaryComparatorFactoryProvider bcfp = context.getBinaryComparatorFactoryProvider();
        comps[i] = bcfp.getBinaryComparatorFactory(type, oc.getOrder() == OrderKind.ASC);
        i++;
    }
    ITuplePartitionComputerFactory tpcf = new FieldRangePartitionComputerFactory(sortFields, comps, rangeMap);
    IConnectorDescriptor conn = new MToNPartitioningMergingConnectorDescriptor(spec, tpcf, sortFields, comps, nkcf);
    return new Pair<IConnectorDescriptor, TargetConstraint>(conn, null);
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) ITuplePartitionComputerFactory(org.apache.hyracks.api.dataflow.value.ITuplePartitionComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) FieldRangePartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.range.FieldRangePartitionComputerFactory) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) OrderColumn(org.apache.hyracks.algebricks.core.algebra.properties.OrderColumn) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) IBinaryComparatorFactoryProvider(org.apache.hyracks.algebricks.data.IBinaryComparatorFactoryProvider) TargetConstraint(org.apache.hyracks.algebricks.core.algebra.base.IHyracksJobBuilder.TargetConstraint) INormalizedKeyComputerFactory(org.apache.hyracks.api.dataflow.value.INormalizedKeyComputerFactory) OrderKind(org.apache.hyracks.algebricks.core.algebra.operators.logical.OrderOperator.IOrder.OrderKind) INormalizedKeyComputerFactoryProvider(org.apache.hyracks.algebricks.data.INormalizedKeyComputerFactoryProvider) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 4 with MToNPartitioningMergingConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor in project asterixdb by apache.

the class DataflowTest method testHDFSReadWriteOperators.

/**
     * Test a job with only HDFS read and writes.
     *
     * @throws Exception
     */
public void testHDFSReadWriteOperators() throws Exception {
    FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
    FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
    conf.setInputFormat(TextInputFormat.class);
    Scheduler scheduler = new Scheduler(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
    InputSplit[] splits = conf.getInputFormat().getSplits(conf, numberOfNC * 4);
    String[] readSchedule = scheduler.getLocationConstraints(splits);
    JobSpecification jobSpec = new JobSpecification();
    RecordDescriptor recordDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID, HyracksUtils.NC2_ID };
    HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(jobSpec, recordDesc, conf, splits, readSchedule, new TextKeyValueParserFactory());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, readOperator, locations);
    ExternalSortOperatorDescriptor sortOperator = new ExternalSortOperatorDescriptor(jobSpec, 10, new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, recordDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, sortOperator, locations);
    HDFSWriteOperatorDescriptor writeOperator = new HDFSWriteOperatorDescriptor(jobSpec, conf, new TextTupleWriterFactory());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, writeOperator, HyracksUtils.NC1_ID);
    jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), readOperator, 0, sortOperator, 0);
    jobSpec.connect(new MToNPartitioningMergingConnectorDescriptor(jobSpec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { RawBinaryHashFunctionFactory.INSTANCE }), new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, null), sortOperator, 0, writeOperator, 0);
    jobSpec.addRoot(writeOperator);
    IHyracksClientConnection client = new HyracksConnection(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
    JobId jobId = client.startJob(jobSpec);
    client.waitForCompletion(jobId);
    Assert.assertEquals(true, checkResults());
}
Also used : Path(org.apache.hadoop.fs.Path) IHyracksClientConnection(org.apache.hyracks.api.client.IHyracksClientConnection) Scheduler(org.apache.hyracks.hdfs.scheduler.Scheduler) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) TextTupleWriterFactory(org.apache.hyracks.hdfs.lib.TextTupleWriterFactory) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) HyracksConnection(org.apache.hyracks.api.client.HyracksConnection) InputSplit(org.apache.hadoop.mapred.InputSplit) TextKeyValueParserFactory(org.apache.hyracks.hdfs.lib.TextKeyValueParserFactory) JobId(org.apache.hyracks.api.job.JobId)

Example 5 with MToNPartitioningMergingConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor in project asterixdb by apache.

the class DataflowTest method testHDFSReadWriteOperators.

/**
     * Test a job with only HDFS read and writes.
     *
     * @throws Exception
     */
@SuppressWarnings({ "rawtypes", "unchecked" })
public void testHDFSReadWriteOperators() throws Exception {
    FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
    FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
    conf.setInputFormatClass(TextInputFormat.class);
    Scheduler scheduler = new Scheduler(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
    InputFormat inputFormat = ReflectionUtils.newInstance(conf.getInputFormatClass(), getConfiguration());
    List<InputSplit> splits = inputFormat.getSplits(conf);
    String[] readSchedule = scheduler.getLocationConstraints(splits);
    JobSpecification jobSpec = new JobSpecification();
    RecordDescriptor recordDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID, HyracksUtils.NC2_ID };
    HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(jobSpec, recordDesc, conf, splits, readSchedule, new TextKeyValueParserFactory());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, readOperator, locations);
    ExternalSortOperatorDescriptor sortOperator = new ExternalSortOperatorDescriptor(jobSpec, 10, new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, recordDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, sortOperator, locations);
    HDFSWriteOperatorDescriptor writeOperator = new HDFSWriteOperatorDescriptor(jobSpec, conf, new TextTupleWriterFactory());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, writeOperator, HyracksUtils.NC1_ID);
    jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), readOperator, 0, sortOperator, 0);
    jobSpec.connect(new MToNPartitioningMergingConnectorDescriptor(jobSpec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { RawBinaryHashFunctionFactory.INSTANCE }), new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, null), sortOperator, 0, writeOperator, 0);
    jobSpec.addRoot(writeOperator);
    IHyracksClientConnection client = new HyracksConnection(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
    JobId jobId = client.startJob(jobSpec);
    client.waitForCompletion(jobId);
    Assert.assertEquals(true, checkResults());
}
Also used : Path(org.apache.hadoop.fs.Path) IHyracksClientConnection(org.apache.hyracks.api.client.IHyracksClientConnection) Scheduler(org.apache.hyracks.hdfs2.scheduler.Scheduler) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) TextTupleWriterFactory(org.apache.hyracks.hdfs.lib.TextTupleWriterFactory) TextInputFormat(org.apache.hadoop.mapreduce.lib.input.TextInputFormat) FileInputFormat(org.apache.hadoop.mapreduce.lib.input.FileInputFormat) InputFormat(org.apache.hadoop.mapreduce.InputFormat) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) HyracksConnection(org.apache.hyracks.api.client.HyracksConnection) InputSplit(org.apache.hadoop.mapreduce.InputSplit) TextKeyValueParserFactory(org.apache.hyracks.hdfs.lib.TextKeyValueParserFactory) JobId(org.apache.hyracks.api.job.JobId)

Aggregations

MToNPartitioningMergingConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor)11 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)10 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)10 JobSpecification (org.apache.hyracks.api.job.JobSpecification)8 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)8 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)6 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)6 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)6 UTF8StringNormalizedKeyComputerFactory (org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory)6 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)6 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)6 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)6 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)6 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)5 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)5 ExternalSortOperatorDescriptor (org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor)5 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)4 FileSplit (org.apache.hyracks.api.io.FileSplit)4 Pair (org.apache.hyracks.algebricks.common.utils.Pair)3 TargetConstraint (org.apache.hyracks.algebricks.core.algebra.base.IHyracksJobBuilder.TargetConstraint)3