Search in sources :

Example 1 with UnionAllOperatorDescriptor

use of org.apache.hyracks.dataflow.std.union.UnionAllOperatorDescriptor in project asterixdb by apache.

the class UnionAllPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    IOperatorDescriptorRegistry spec = builder.getJobSpec();
    RecordDescriptor recordDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), opSchema, context);
    // at algebricks level, union all only accepts two inputs, although at
    // hyracks
    // level, there is no restrictions
    UnionAllOperatorDescriptor opDesc = new UnionAllOperatorDescriptor(spec, 2, recordDescriptor);
    contributeOpDesc(builder, (AbstractLogicalOperator) op, opDesc);
    ILogicalOperator src1 = op.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src1, 0, op, 0);
    ILogicalOperator src2 = op.getInputs().get(1).getValue();
    builder.contributeGraphEdge(src2, 0, op, 1);
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) IOperatorDescriptorRegistry(org.apache.hyracks.api.job.IOperatorDescriptorRegistry) UnionAllOperatorDescriptor(org.apache.hyracks.dataflow.std.union.UnionAllOperatorDescriptor)

Example 2 with UnionAllOperatorDescriptor

use of org.apache.hyracks.dataflow.std.union.UnionAllOperatorDescriptor in project asterixdb by apache.

the class UnionTest method createUnionJobSpec.

public static JobSpecification createUnionJobSpec() throws Exception {
    JobSpecification spec = new JobSpecification();
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt"), new ManagedFileSplit(NC1_ID, "data" + File.separator + "nc1" + File.separator + "words.txt") });
    RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor csvScanner01 = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner01, NC2_ID, NC1_ID);
    FileScanOperatorDescriptor csvScanner02 = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner02, NC2_ID, NC1_ID);
    UnionAllOperatorDescriptor unionAll = new UnionAllOperatorDescriptor(spec, 2, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, unionAll, NC2_ID, NC1_ID);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), csvScanner01, 0, unionAll, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), csvScanner02, 0, unionAll, 1);
    spec.connect(new OneToOneConnectorDescriptor(spec), unionAll, 0, printer, 0);
    spec.addRoot(printer);
    return spec;
}
Also used : IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) UnionAllOperatorDescriptor(org.apache.hyracks.dataflow.std.union.UnionAllOperatorDescriptor)

Aggregations

RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)2 UnionAllOperatorDescriptor (org.apache.hyracks.dataflow.std.union.UnionAllOperatorDescriptor)2 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)1 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)1 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)1 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)1 IOperatorDescriptorRegistry (org.apache.hyracks.api.job.IOperatorDescriptorRegistry)1 JobSpecification (org.apache.hyracks.api.job.JobSpecification)1 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)1 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)1 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)1 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)1 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)1 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)1 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)1 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)1