Examples with RecordDescriptor - org.apache.hyracks.api.dataflow.value.RecordDescriptor

Example 96 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class SubplanPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    SubplanOperator subplan = (SubplanOperator) op;
    if (subplan.getNestedPlans().size() != 1) {
        throw new NotImplementedException("Subplan currently works only for one nested plan with one root.");
    }
    AlgebricksPipeline[] subplans = compileSubplans(inputSchemas[0], subplan, opSchema, context);
    assert subplans.length == 1;
    AlgebricksPipeline np = subplans[0];
    RecordDescriptor inputRecordDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op.getInputs().get(0).getValue()), inputSchemas[0], context);
    IMissingWriterFactory[] missingWriterFactories = new IMissingWriterFactory[np.getOutputWidth()];
    for (int i = 0; i < missingWriterFactories.length; i++) {
        missingWriterFactories[i] = context.getMissingWriterFactory();
    }
    SubplanRuntimeFactory runtime = new SubplanRuntimeFactory(np, missingWriterFactories, inputRecordDesc, null);
    RecordDescriptor recDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), opSchema, context);
    builder.contributeMicroOperator(subplan, runtime, recDesc);
    ILogicalOperator src = op.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, op, 0);
}

Also used : SubplanOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) NotImplementedException(org.apache.hyracks.algebricks.common.exceptions.NotImplementedException) IMissingWriterFactory(org.apache.hyracks.api.dataflow.value.IMissingWriterFactory) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) SubplanRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.meta.SubplanRuntimeFactory) AlgebricksPipeline(org.apache.hyracks.algebricks.runtime.base.AlgebricksPipeline)

Example 97 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class TokenizePOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    TokenizeOperator tokenizeOp = (TokenizeOperator) op;
    if (tokenizeOp.getOperation() != Kind.INSERT || !tokenizeOp.isBulkload()) {
        throw new AlgebricksException("Tokenize Operator only works when bulk-loading data.");
    }
    IMetadataProvider mp = context.getMetadataProvider();
    IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(op);
    JobSpecification spec = builder.getJobSpec();
    RecordDescriptor inputDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op.getInputs().get(0).getValue()), inputSchemas[0], context);
    Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> runtimeAndConstraints = mp.getTokenizerRuntime(dataSourceIndex, propagatedSchema, inputSchemas, typeEnv, primaryKeys, secondaryKeys, null, inputDesc, context, spec, true);
    builder.contributeHyracksOperator(tokenizeOp, runtimeAndConstraints.first);
    builder.contributeAlgebricksPartitionConstraint(runtimeAndConstraints.first, runtimeAndConstraints.second);
    ILogicalOperator src = tokenizeOp.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, tokenizeOp, 0);
}

Also used : TokenizeOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.TokenizeOperator) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IMetadataProvider(org.apache.hyracks.algebricks.core.algebra.metadata.IMetadataProvider) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment)

Example 98 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class RunningAggregatePOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    RunningAggregateOperator ragg = (RunningAggregateOperator) op;
    List<LogicalVariable> variables = ragg.getVariables();
    List<Mutable<ILogicalExpression>> expressions = ragg.getExpressions();
    int[] outColumns = new int[variables.size()];
    for (int i = 0; i < outColumns.length; i++) {
        outColumns[i] = opSchema.findVariable(variables.get(i));
    }
    IRunningAggregateEvaluatorFactory[] runningAggFuns = new IRunningAggregateEvaluatorFactory[expressions.size()];
    IExpressionRuntimeProvider expressionRuntimeProvider = context.getExpressionRuntimeProvider();
    for (int i = 0; i < runningAggFuns.length; i++) {
        StatefulFunctionCallExpression expr = (StatefulFunctionCallExpression) expressions.get(i).getValue();
        runningAggFuns[i] = expressionRuntimeProvider.createRunningAggregateFunctionFactory(expr, context.getTypeEnvironment(op.getInputs().get(0).getValue()), inputSchemas, context);
    }
    // TODO push projections into the operator
    int[] projectionList = JobGenHelper.projectAllVariables(opSchema);
    RunningAggregateRuntimeFactory runtime = new RunningAggregateRuntimeFactory(outColumns, runningAggFuns, projectionList);
    // contribute one Asterix framewriter
    RecordDescriptor recDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), opSchema, context);
    builder.contributeMicroOperator(ragg, runtime, recDesc);
    // and contribute one edge from its child
    ILogicalOperator src = ragg.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, ragg, 0);
}

Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) RunningAggregateRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.RunningAggregateRuntimeFactory) IRunningAggregateEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IRunningAggregateEvaluatorFactory) StatefulFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.StatefulFunctionCallExpression) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) Mutable(org.apache.commons.lang3.mutable.Mutable) IExpressionRuntimeProvider(org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionRuntimeProvider) RunningAggregateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.RunningAggregateOperator)

Example 99 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class SinkWritePOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    WriteOperator write = (WriteOperator) op;
    int[] columns = new int[write.getExpressions().size()];
    int i = 0;
    for (Mutable<ILogicalExpression> exprRef : write.getExpressions()) {
        ILogicalExpression expr = exprRef.getValue();
        if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
            throw new NotImplementedException("Only writing variable expressions is supported.");
        }
        VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
        LogicalVariable v = varRef.getVariableReference();
        columns[i++] = inputSchemas[0].findVariable(v);
    }
    RecordDescriptor recDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), propagatedSchema, context);
    RecordDescriptor inputDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op.getInputs().get(0).getValue()), inputSchemas[0], context);
    IPrinterFactory[] pf = JobGenHelper.mkPrinterFactories(inputSchemas[0], context.getTypeEnvironment(op), context, columns);
    IMetadataProvider<?, ?> mp = context.getMetadataProvider();
    Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> runtime = mp.getWriteFileRuntime(write.getDataSink(), columns, pf, inputDesc);
    builder.contributeMicroOperator(write, runtime.first, recDesc, runtime.second);
    ILogicalOperator src = write.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, write, 0);
}

Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) NotImplementedException(org.apache.hyracks.algebricks.common.exceptions.NotImplementedException) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) IPushRuntimeFactory(org.apache.hyracks.algebricks.runtime.base.IPushRuntimeFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) IPrinterFactory(org.apache.hyracks.algebricks.data.IPrinterFactory) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) WriteOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.WriteOperator) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)

Example 100 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class StreamLimitPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    LimitOperator limit = (LimitOperator) op;
    IExpressionRuntimeProvider expressionRuntimeProvider = context.getExpressionRuntimeProvider();
    IVariableTypeEnvironment env = context.getTypeEnvironment(op);
    IScalarEvaluatorFactory maxObjectsFact = expressionRuntimeProvider.createEvaluatorFactory(limit.getMaxObjects().getValue(), env, inputSchemas, context);
    ILogicalExpression offsetExpr = limit.getOffset().getValue();
    IScalarEvaluatorFactory offsetFact = (offsetExpr == null) ? null : expressionRuntimeProvider.createEvaluatorFactory(offsetExpr, env, inputSchemas, context);
    RecordDescriptor recDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), propagatedSchema, context);
    StreamLimitRuntimeFactory runtime = new StreamLimitRuntimeFactory(maxObjectsFact, offsetFact, null, context.getBinaryIntegerInspectorFactory());
    builder.contributeMicroOperator(limit, runtime, recDesc);
    // and contribute one edge from its child
    ILogicalOperator src = limit.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, limit, 0);
}

Also used : IExpressionRuntimeProvider(org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionRuntimeProvider) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) LimitOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.LimitOperator) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory) StreamLimitRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.StreamLimitRuntimeFactory)

Aggregations

RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)169 JobSpecification (org.apache.hyracks.api.job.JobSpecification)90 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)74 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)72 Test (org.junit.Test)69 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)64 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)55 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)53 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)52 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)41 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)40 FileSplit (org.apache.hyracks.api.io.FileSplit)38 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)37 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)36 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)35 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)35 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)33 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)31 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)27 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)25