Examples with AlgebricksPipeline - org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline

Example 1 with AlgebricksPipeline

use of org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline in project asterixdb by apache.

the class PushRuntimeTest method scanSortGbySelectWrite.

@Test
public void scanSortGbySelectWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    // the scanner
    FileSplit[] fileSplits = new FileSplit[1];
    fileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer.tbl");
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
    RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
    FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanner, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    // the sort (by nation id)
    RecordDescriptor sortDesc = scannerDesc;
    InMemorySortOperatorDescriptor sort = new InMemorySortOperatorDescriptor(spec, new int[] { 3 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, sortDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sort, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    // the group-by
    NestedTupleSourceRuntimeFactory nts = new NestedTupleSourceRuntimeFactory();
    RecordDescriptor ntsDesc = sortDesc;
    AggregateRuntimeFactory agg = new AggregateRuntimeFactory(new IAggregateEvaluatorFactory[] { new TupleCountAggregateFunctionFactory() });
    RecordDescriptor aggDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    AlgebricksPipeline pipeline = new AlgebricksPipeline(new IPushRuntimeFactory[] { nts, agg }, new RecordDescriptor[] { ntsDesc, aggDesc });
    NestedPlansAccumulatingAggregatorFactory npaaf = new NestedPlansAccumulatingAggregatorFactory(new AlgebricksPipeline[] { pipeline }, new int[] { 3 }, new int[] {});
    RecordDescriptor gbyDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    PreclusteredGroupOperatorDescriptor gby = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 3 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, npaaf, gbyDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, gby, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    // the algebricks op.
    IScalarEvaluatorFactory cond = new IntegerEqualsEvalFactory(new IntegerConstantEvalFactory(3), // Canadian customers
    new TupleFieldEvaluatorFactory(0));
    StreamSelectRuntimeFactory select = new StreamSelectRuntimeFactory(cond, new int[] { 1 }, BinaryBooleanInspectorImpl.FACTORY, false, -1, null);
    RecordDescriptor selectDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    String filePath = PATH_ACTUAL + SEPARATOR + "scanSortGbySelectWrite.out";
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, selectDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { select, writer }, new RecordDescriptor[] { selectDesc, null });
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, sort, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sort, 0, gby, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), gby, 0, algebricksOp, 0);
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    StringBuilder buf = new StringBuilder();
    readFileToString(outFile, buf);
    Assert.assertEquals("9", buf.toString());
    outFile.delete();
}

Also used : AggregateRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.AggregateRuntimeFactory) RunningAggregateRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.RunningAggregateRuntimeFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) TupleCountAggregateFunctionFactory(org.apache.hyracks.algebricks.runtime.aggregators.TupleCountAggregateFunctionFactory) NestedTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.NestedTupleSourceRuntimeFactory) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) AlgebricksPipeline(org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline) TupleFieldEvaluatorFactory(org.apache.hyracks.algebricks.runtime.evaluators.TupleFieldEvaluatorFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) InMemorySortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) NestedPlansAccumulatingAggregatorFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.NestedPlansAccumulatingAggregatorFactory) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory) StreamSelectRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.StreamSelectRuntimeFactory) SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) PreclusteredGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor) File(java.io.File) Test(org.junit.Test)

Example 2 with AlgebricksPipeline

use of org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline in project asterixdb by apache.

the class PushRuntimeTest method etsAssignSubplanProjectWrite.

@Test
public void etsAssignSubplanProjectWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    IntegerConstantEvalFactory const1 = new IntegerConstantEvalFactory(400);
    IntegerConstantEvalFactory const2 = new IntegerConstantEvalFactory(3);
    EmptyTupleSourceRuntimeFactory ets = new EmptyTupleSourceRuntimeFactory();
    RecordDescriptor etsDesc = new RecordDescriptor(new ISerializerDeserializer[] {});
    AssignRuntimeFactory assign1 = new AssignRuntimeFactory(new int[] { 0 }, new IScalarEvaluatorFactory[] { const1 }, new int[] { 0 });
    RecordDescriptor assign1Desc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    NestedTupleSourceRuntimeFactory nts = new NestedTupleSourceRuntimeFactory();
    AssignRuntimeFactory assign2 = new AssignRuntimeFactory(new int[] { 1 }, new IScalarEvaluatorFactory[] { new IntegerAddEvalFactory(new TupleFieldEvaluatorFactory(0), const2) }, new int[] { 0, 1 });
    RecordDescriptor assign2Desc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    StreamProjectRuntimeFactory project1 = new StreamProjectRuntimeFactory(new int[] { 1 });
    RecordDescriptor project1Desc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    AlgebricksPipeline pipeline = new AlgebricksPipeline(new IPushRuntimeFactory[] { nts, assign2, project1 }, new RecordDescriptor[] { assign1Desc, assign2Desc, project1Desc });
    SubplanRuntimeFactory subplan = new SubplanRuntimeFactory(pipeline, new IMissingWriterFactory[] { NoopMissingWriterFactory.INSTANCE }, assign1Desc, null);
    RecordDescriptor subplanDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    StreamProjectRuntimeFactory project2 = new StreamProjectRuntimeFactory(new int[] { 1 });
    RecordDescriptor project2Desc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    String filePath = PATH_ACTUAL + SEPARATOR + "etsAssignSubplanProjectWrite.out";
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, project2Desc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 0, 0, new IPushRuntimeFactory[] { ets, assign1, subplan, project2, writer }, new RecordDescriptor[] { etsDesc, assign1Desc, subplanDesc, project2Desc, null });
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, DEFAULT_NODES);
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    StringBuilder buf = new StringBuilder();
    readFileToString(outFile, buf);
    Assert.assertEquals("403", buf.toString());
    outFile.delete();
}

Also used : TupleFieldEvaluatorFactory(org.apache.hyracks.algebricks.runtime.evaluators.TupleFieldEvaluatorFactory) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) SubplanRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.meta.SubplanRuntimeFactory) AssignRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.AssignRuntimeFactory) NestedTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.NestedTupleSourceRuntimeFactory) SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) EmptyTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.EmptyTupleSourceRuntimeFactory) StreamProjectRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.StreamProjectRuntimeFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) AlgebricksPipeline(org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline) File(java.io.File) Test(org.junit.Test)

Example 3 with AlgebricksPipeline

use of org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline in project asterixdb by apache.

the class PreclusteredGroupByPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    int[] keys = JobGenHelper.variablesToFieldIndexes(columnList, inputSchemas[0]);
    GroupByOperator gby = (GroupByOperator) op;
    int[] fdColumns = getFdColumns(gby, inputSchemas[0]);
    // compile subplans and set the gby op. schema accordingly
    AlgebricksPipeline[] subplans = compileSubplans(inputSchemas[0], gby, opSchema, context);
    IAggregatorDescriptorFactory aggregatorFactory;
    if (gby.getNestedPlans().get(0).getRoots().get(0).getValue().getOperatorTag() == LogicalOperatorTag.RUNNINGAGGREGATE) {
        aggregatorFactory = new NestedPlansRunningAggregatorFactory(subplans, keys, fdColumns);
    } else {
        aggregatorFactory = new NestedPlansAccumulatingAggregatorFactory(subplans, keys, fdColumns);
    }
    IOperatorDescriptorRegistry spec = builder.getJobSpec();
    IBinaryComparatorFactory[] comparatorFactories = JobGenHelper.variablesToAscBinaryComparatorFactories(columnList, context.getTypeEnvironment(op), context);
    RecordDescriptor recordDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), opSchema, context);
    PreclusteredGroupOperatorDescriptor opDesc = new PreclusteredGroupOperatorDescriptor(spec, keys, comparatorFactories, aggregatorFactory, recordDescriptor, groupAll);
    contributeOpDesc(builder, (AbstractLogicalOperator) op, opDesc);
    ILogicalOperator src = op.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, op, 0);
}

Also used : GroupByOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) NestedPlansAccumulatingAggregatorFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.NestedPlansAccumulatingAggregatorFactory) IOperatorDescriptorRegistry(org.apache.hyracks.api.job.IOperatorDescriptorRegistry) IAggregatorDescriptorFactory(org.apache.hyracks.dataflow.std.group.IAggregatorDescriptorFactory) NestedPlansRunningAggregatorFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.NestedPlansRunningAggregatorFactory) PreclusteredGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor) AlgebricksPipeline(org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline)

Example 4 with AlgebricksPipeline

use of org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline in project asterixdb by apache.

the class SubplanPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    SubplanOperator subplan = (SubplanOperator) op;
    if (subplan.getNestedPlans().size() != 1) {
        throw new NotImplementedException("Subplan currently works only for one nested plan with one root.");
    }
    AlgebricksPipeline[] subplans = compileSubplans(inputSchemas[0], subplan, opSchema, context);
    assert subplans.length == 1;
    AlgebricksPipeline np = subplans[0];
    RecordDescriptor inputRecordDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op.getInputs().get(0).getValue()), inputSchemas[0], context);
    IMissingWriterFactory[] missingWriterFactories = new IMissingWriterFactory[np.getOutputWidth()];
    for (int i = 0; i < missingWriterFactories.length; i++) {
        missingWriterFactories[i] = context.getMissingWriterFactory();
    }
    SubplanRuntimeFactory runtime = new SubplanRuntimeFactory(np, missingWriterFactories, inputRecordDesc, null);
    RecordDescriptor recDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), opSchema, context);
    builder.contributeMicroOperator(subplan, runtime, recDesc);
    ILogicalOperator src = op.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, op, 0);
}

Also used : SubplanOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) NotImplementedException(org.apache.hyracks.algebricks.common.exceptions.NotImplementedException) IMissingWriterFactory(org.apache.hyracks.api.dataflow.value.IMissingWriterFactory) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) SubplanRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.meta.SubplanRuntimeFactory) AlgebricksPipeline(org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline)

Example 5 with AlgebricksPipeline

use of org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline in project asterixdb by apache.

the class AbstractPhysicalOperator method compileSubplans.

protected AlgebricksPipeline[] compileSubplans(IOperatorSchema outerPlanSchema, AbstractOperatorWithNestedPlans npOp, IOperatorSchema opSchema, JobGenContext context) throws AlgebricksException {
    AlgebricksPipeline[] subplans = new AlgebricksPipeline[npOp.getNestedPlans().size()];
    PlanCompiler pc = new PlanCompiler(context);
    int i = 0;
    for (ILogicalPlan p : npOp.getNestedPlans()) {
        subplans[i++] = buildPipelineWithProjection(p, outerPlanSchema, npOp, opSchema, pc);
    }
    return subplans;
}

Also used : PlanCompiler(org.apache.hyracks.algebricks.core.jobgen.impl.PlanCompiler) ILogicalPlan(org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan) AlgebricksPipeline(org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksCountPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksCountPartitionConstraint)

Aggregations

AlgebricksPipeline (org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline)7 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)6 NestedPlansAccumulatingAggregatorFactory (org.apache.hyracks.algebricks.runtime.operators.aggreg.NestedPlansAccumulatingAggregatorFactory)4 File (java.io.File)3 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)3 TupleFieldEvaluatorFactory (org.apache.hyracks.algebricks.runtime.evaluators.TupleFieldEvaluatorFactory)3 AlgebricksMetaOperatorDescriptor (org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor)3 NestedTupleSourceRuntimeFactory (org.apache.hyracks.algebricks.runtime.operators.std.NestedTupleSourceRuntimeFactory)3 SinkWriterRuntimeFactory (org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory)3 JobSpecification (org.apache.hyracks.api.job.JobSpecification)3 Test (org.junit.Test)3 GroupByOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator)2 TupleCountAggregateFunctionFactory (org.apache.hyracks.algebricks.runtime.aggregators.TupleCountAggregateFunctionFactory)2 IScalarEvaluatorFactory (org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory)2 AggregateRuntimeFactory (org.apache.hyracks.algebricks.runtime.operators.aggreg.AggregateRuntimeFactory)2 MicroPreClusteredGroupRuntimeFactory (org.apache.hyracks.algebricks.runtime.operators.group.MicroPreClusteredGroupRuntimeFactory)2 SubplanRuntimeFactory (org.apache.hyracks.algebricks.runtime.operators.meta.SubplanRuntimeFactory)2 RunningAggregateRuntimeFactory (org.apache.hyracks.algebricks.runtime.operators.std.RunningAggregateRuntimeFactory)2 StreamSelectRuntimeFactory (org.apache.hyracks.algebricks.runtime.operators.std.StreamSelectRuntimeFactory)2 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)2