Search in sources :

Example 1 with IOperatorSchema

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema in project asterixdb by apache.

the class MetadataProvider method getTokenizerRuntime.

@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getTokenizerRuntime(IDataSourceIndex<String, DataSourceId> dataSourceIndex, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys, ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, boolean bulkload) throws AlgebricksException {
    String indexName = dataSourceIndex.getId();
    String dataverseName = dataSourceIndex.getDataSource().getId().getDataverseName();
    String datasetName = dataSourceIndex.getDataSource().getId().getDatasourceName();
    IOperatorSchema inputSchema;
    if (inputSchemas.length > 0) {
        inputSchema = inputSchemas[0];
    } else {
        throw new AlgebricksException("TokenizeOperator can not operate without any input variable.");
    }
    Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataverseName, datasetName);
    Index secondaryIndex;
    try {
        secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName);
    } catch (MetadataException e) {
        throw new AlgebricksException(e);
    }
    // TokenizeOperator only supports a keyword or n-gram index.
    switch(secondaryIndex.getIndexType()) {
        case SINGLE_PARTITION_WORD_INVIX:
        case SINGLE_PARTITION_NGRAM_INVIX:
        case LENGTH_PARTITIONED_WORD_INVIX:
        case LENGTH_PARTITIONED_NGRAM_INVIX:
            return getBinaryTokenizerRuntime(dataverseName, datasetName, indexName, inputSchema, propagatedSchema, primaryKeys, secondaryKeys, recordDesc, spec, secondaryIndex.getIndexType());
        default:
            throw new AlgebricksException("Currently, we do not support TokenizeOperator for the index type: " + secondaryIndex.getIndexType());
    }
}
Also used : IOperatorSchema(org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema) Dataset(org.apache.asterix.metadata.entities.Dataset) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) Index(org.apache.asterix.metadata.entities.Index) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) MetadataException(org.apache.asterix.metadata.MetadataException)

Example 2 with IOperatorSchema

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema in project asterixdb by apache.

the class NestedLoopJoinPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    AbstractBinaryJoinOperator join = (AbstractBinaryJoinOperator) op;
    RecordDescriptor recDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), propagatedSchema, context);
    IOperatorSchema[] conditionInputSchemas = new IOperatorSchema[1];
    conditionInputSchemas[0] = propagatedSchema;
    IExpressionRuntimeProvider expressionRuntimeProvider = context.getExpressionRuntimeProvider();
    IScalarEvaluatorFactory cond = expressionRuntimeProvider.createEvaluatorFactory(join.getCondition().getValue(), context.getTypeEnvironment(op), conditionInputSchemas, context);
    ITuplePairComparatorFactory comparatorFactory = new TuplePairEvaluatorFactory(cond, context.getBinaryBooleanInspectorFactory());
    IOperatorDescriptorRegistry spec = builder.getJobSpec();
    IOperatorDescriptor opDesc = null;
    switch(kind) {
        case INNER:
            {
                opDesc = new NestedLoopJoinOperatorDescriptor(spec, comparatorFactory, recDescriptor, memSize, false, null);
                break;
            }
        case LEFT_OUTER:
            {
                IMissingWriterFactory[] nonMatchWriterFactories = new IMissingWriterFactory[inputSchemas[1].getSize()];
                for (int j = 0; j < nonMatchWriterFactories.length; j++) {
                    nonMatchWriterFactories[j] = context.getMissingWriterFactory();
                }
                opDesc = new NestedLoopJoinOperatorDescriptor(spec, comparatorFactory, recDescriptor, memSize, true, nonMatchWriterFactories);
                break;
            }
        default:
            {
                throw new NotImplementedException();
            }
    }
    contributeOpDesc(builder, (AbstractLogicalOperator) op, opDesc);
    ILogicalOperator src1 = op.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src1, 0, op, 0);
    ILogicalOperator src2 = op.getInputs().get(1).getValue();
    builder.contributeGraphEdge(src2, 0, op, 1);
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IOperatorSchema(org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema) NotImplementedException(org.apache.hyracks.algebricks.common.exceptions.NotImplementedException) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) IOperatorDescriptorRegistry(org.apache.hyracks.api.job.IOperatorDescriptorRegistry) AbstractBinaryJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory) IExpressionRuntimeProvider(org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionRuntimeProvider) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) NestedLoopJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.NestedLoopJoinOperatorDescriptor) ITuplePairComparatorFactory(org.apache.hyracks.api.dataflow.value.ITuplePairComparatorFactory)

Example 3 with IOperatorSchema

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema in project asterixdb by apache.

the class AbstractPhysicalOperator method buildPipelineWithProjection.

private AlgebricksPipeline buildPipelineWithProjection(ILogicalPlan p, IOperatorSchema outerPlanSchema, AbstractOperatorWithNestedPlans npOp, IOperatorSchema opSchema, PlanCompiler pc) throws AlgebricksException {
    if (p.getRoots().size() > 1) {
        throw new NotImplementedException("Nested plans with several roots are not supported.");
    }
    JobSpecification nestedJob = pc.compilePlan(p, outerPlanSchema, null);
    ILogicalOperator topOpInSubplan = p.getRoots().get(0).getValue();
    JobGenContext context = pc.getContext();
    IOperatorSchema topOpInSubplanScm = context.getSchema(topOpInSubplan);
    opSchema.addAllVariables(topOpInSubplanScm);
    Map<OperatorDescriptorId, IOperatorDescriptor> opMap = nestedJob.getOperatorMap();
    if (opMap.size() != 1) {
        throw new AlgebricksException("Attempting to construct a nested plan with " + opMap.size() + " operator descriptors. Currently, nested plans can only consist in linear pipelines of Asterix micro operators.");
    }
    for (Map.Entry<OperatorDescriptorId, IOperatorDescriptor> opEntry : opMap.entrySet()) {
        IOperatorDescriptor opd = opEntry.getValue();
        if (!(opd instanceof AlgebricksMetaOperatorDescriptor)) {
            throw new AlgebricksException("Can only generate Hyracks jobs for pipelinable Asterix nested plans, not for " + opd.getClass().getName());
        }
        AlgebricksMetaOperatorDescriptor amod = (AlgebricksMetaOperatorDescriptor) opd;
        return amod.getPipeline();
    // we suppose that the top operator in the subplan already does the
    // projection for us
    }
    throw new IllegalStateException();
}
Also used : OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) IOperatorSchema(org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema) NotImplementedException(org.apache.hyracks.algebricks.common.exceptions.NotImplementedException) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Map(java.util.Map) JobGenContext(org.apache.hyracks.algebricks.core.jobgen.impl.JobGenContext)

Example 4 with IOperatorSchema

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema in project asterixdb by apache.

the class VariablePropagationPolicy method concat.

public static VariablePropagationPolicy concat(final VariablePropagationPolicy... policies) {
    return new VariablePropagationPolicy() {

        @Override
        public void propagateVariables(IOperatorSchema target, IOperatorSchema... sources) throws AlgebricksException {
            if (policies.length != sources.length) {
                throw new IllegalArgumentException();
            }
            for (int i = 0; i < policies.length; ++i) {
                VariablePropagationPolicy p = policies[i];
                IOperatorSchema s = sources[i];
                p.propagateVariables(target, s);
            }
        }
    };
}
Also used : IOperatorSchema(org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema)

Example 5 with IOperatorSchema

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema in project asterixdb by apache.

the class SortGroupByPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    List<LogicalVariable> gbyCols = getGbyColumns();
    int[] keys = JobGenHelper.variablesToFieldIndexes(gbyCols, inputSchemas[0]);
    GroupByOperator gby = (GroupByOperator) op;
    int numFds = gby.getDecorList().size();
    int[] fdColumns = new int[numFds];
    int j = 0;
    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
        ILogicalExpression expr = p.second.getValue();
        if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
            throw new AlgebricksException("Sort group-by expects variable references.");
        }
        VariableReferenceExpression v = (VariableReferenceExpression) expr;
        LogicalVariable decor = v.getVariableReference();
        fdColumns[j++] = inputSchemas[0].findVariable(decor);
    }
    if (gby.getNestedPlans().size() != 1) {
        throw new AlgebricksException("Sort group-by currently works only for one nested plan with one root containing" + "an aggregate and a nested-tuple-source.");
    }
    ILogicalPlan p0 = gby.getNestedPlans().get(0);
    if (p0.getRoots().size() != 1) {
        throw new AlgebricksException("Sort group-by currently works only for one nested plan with one root containing" + "an aggregate and a nested-tuple-source.");
    }
    Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
    AggregateOperator aggOp = (AggregateOperator) r0.getValue();
    IPartialAggregationTypeComputer partialAggregationTypeComputer = context.getPartialAggregationTypeComputer();
    List<Object> intermediateTypes = new ArrayList<Object>();
    int n = aggOp.getExpressions().size();
    IAggregateEvaluatorFactory[] aff = new IAggregateEvaluatorFactory[n];
    int i = 0;
    IExpressionRuntimeProvider expressionRuntimeProvider = context.getExpressionRuntimeProvider();
    IVariableTypeEnvironment aggOpInputEnv = context.getTypeEnvironment(aggOp.getInputs().get(0).getValue());
    IVariableTypeEnvironment outputEnv = context.getTypeEnvironment(op);
    for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
        AggregateFunctionCallExpression aggFun = (AggregateFunctionCallExpression) exprRef.getValue();
        aff[i++] = expressionRuntimeProvider.createAggregateFunctionFactory(aggFun, aggOpInputEnv, inputSchemas, context);
        intermediateTypes.add(partialAggregationTypeComputer.getType(aggFun, aggOpInputEnv, context.getMetadataProvider()));
    }
    int[] keyAndDecFields = new int[keys.length + fdColumns.length];
    for (i = 0; i < keys.length; ++i) {
        keyAndDecFields[i] = keys[i];
    }
    for (i = 0; i < fdColumns.length; i++) {
        keyAndDecFields[keys.length + i] = fdColumns[i];
    }
    List<LogicalVariable> keyAndDecVariables = new ArrayList<LogicalVariable>();
    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getGroupByList()) {
        keyAndDecVariables.add(p.first);
    }
    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
        keyAndDecVariables.add(GroupByOperator.getDecorVariable(p));
    }
    for (LogicalVariable var : keyAndDecVariables) {
        aggOpInputEnv.setVarType(var, outputEnv.getVarType(var));
    }
    compileSubplans(inputSchemas[0], gby, opSchema, context);
    IOperatorDescriptorRegistry spec = builder.getJobSpec();
    IBinaryComparatorFactory[] compFactories = new IBinaryComparatorFactory[gbyCols.size()];
    IBinaryComparatorFactoryProvider bcfProvider = context.getBinaryComparatorFactoryProvider();
    i = 0;
    for (LogicalVariable v : gbyCols) {
        Object type = aggOpInputEnv.getVarType(v);
        if (orderColumns[i].getOrder() == OrderKind.ASC) {
            compFactories[i] = bcfProvider.getBinaryComparatorFactory(type, true);
        } else {
            compFactories[i] = bcfProvider.getBinaryComparatorFactory(type, false);
        }
        i++;
    }
    RecordDescriptor recordDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), opSchema, context);
    IAggregateEvaluatorFactory[] merges = new IAggregateEvaluatorFactory[n];
    List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
    IOperatorSchema[] localInputSchemas = new IOperatorSchema[1];
    localInputSchemas[0] = new OperatorSchemaImpl();
    for (i = 0; i < n; i++) {
        AggregateFunctionCallExpression aggFun = (AggregateFunctionCallExpression) aggOp.getMergeExpressions().get(i).getValue();
        aggFun.getUsedVariables(usedVars);
    }
    i = 0;
    for (Object type : intermediateTypes) {
        aggOpInputEnv.setVarType(usedVars.get(i++), type);
    }
    for (LogicalVariable keyVar : keyAndDecVariables) {
        localInputSchemas[0].addVariable(keyVar);
    }
    for (LogicalVariable usedVar : usedVars) {
        localInputSchemas[0].addVariable(usedVar);
    }
    for (i = 0; i < n; i++) {
        AggregateFunctionCallExpression mergeFun = (AggregateFunctionCallExpression) aggOp.getMergeExpressions().get(i).getValue();
        merges[i] = expressionRuntimeProvider.createAggregateFunctionFactory(mergeFun, aggOpInputEnv, localInputSchemas, context);
    }
    RecordDescriptor partialAggRecordDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), localInputSchemas[0], context);
    IAggregatorDescriptorFactory aggregatorFactory = new SimpleAlgebricksAccumulatingAggregatorFactory(aff, keyAndDecFields);
    IAggregatorDescriptorFactory mergeFactory = new SimpleAlgebricksAccumulatingAggregatorFactory(merges, keyAndDecFields);
    INormalizedKeyComputerFactory normalizedKeyFactory = null;
    INormalizedKeyComputerFactoryProvider nkcfProvider = context.getNormalizedKeyComputerFactoryProvider();
    if (nkcfProvider == null) {
        normalizedKeyFactory = null;
    }
    Object type = aggOpInputEnv.getVarType(gbyCols.get(0));
    normalizedKeyFactory = orderColumns[0].getOrder() == OrderKind.ASC ? nkcfProvider.getNormalizedKeyComputerFactory(type, true) : nkcfProvider.getNormalizedKeyComputerFactory(type, false);
    SortGroupByOperatorDescriptor gbyOpDesc = new SortGroupByOperatorDescriptor(spec, frameLimit, keys, keyAndDecFields, normalizedKeyFactory, compFactories, aggregatorFactory, mergeFactory, partialAggRecordDescriptor, recordDescriptor, false);
    contributeOpDesc(builder, gby, gbyOpDesc);
    ILogicalOperator src = op.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, op, 0);
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IOperatorSchema(org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema) ArrayList(java.util.ArrayList) SimpleAlgebricksAccumulatingAggregatorFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.SimpleAlgebricksAccumulatingAggregatorFactory) IAggregateEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IAggregateEvaluatorFactory) IBinaryComparatorFactoryProvider(org.apache.hyracks.algebricks.data.IBinaryComparatorFactoryProvider) IExpressionRuntimeProvider(org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionRuntimeProvider) AggregateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator) IPartialAggregationTypeComputer(org.apache.hyracks.algebricks.core.algebra.expressions.IPartialAggregationTypeComputer) LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AggregateFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression) GroupByOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) IOperatorDescriptorRegistry(org.apache.hyracks.api.job.IOperatorDescriptorRegistry) OperatorSchemaImpl(org.apache.hyracks.algebricks.core.jobgen.impl.OperatorSchemaImpl) IAggregatorDescriptorFactory(org.apache.hyracks.dataflow.std.group.IAggregatorDescriptorFactory) Mutable(org.apache.commons.lang3.mutable.Mutable) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) INormalizedKeyComputerFactory(org.apache.hyracks.api.dataflow.value.INormalizedKeyComputerFactory) INormalizedKeyComputerFactoryProvider(org.apache.hyracks.algebricks.data.INormalizedKeyComputerFactoryProvider) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) ILogicalPlan(org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment) SortGroupByOperatorDescriptor(org.apache.hyracks.dataflow.std.group.sort.SortGroupByOperatorDescriptor)

Aggregations

IOperatorSchema (org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema)7 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)5 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)4 Mutable (org.apache.commons.lang3.mutable.Mutable)3 IExpressionRuntimeProvider (org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionRuntimeProvider)3 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)3 IOperatorDescriptorRegistry (org.apache.hyracks.api.job.IOperatorDescriptorRegistry)3 ArrayList (java.util.ArrayList)2 NotImplementedException (org.apache.hyracks.algebricks.common.exceptions.NotImplementedException)2 ILogicalExpression (org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression)2 ILogicalPlan (org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan)2 LogicalVariable (org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable)2 AggregateFunctionCallExpression (org.apache.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression)2 IPartialAggregationTypeComputer (org.apache.hyracks.algebricks.core.algebra.expressions.IPartialAggregationTypeComputer)2 IVariableTypeEnvironment (org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment)2 VariableReferenceExpression (org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression)2 AggregateOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator)2 GroupByOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator)2 OperatorSchemaImpl (org.apache.hyracks.algebricks.core.jobgen.impl.OperatorSchemaImpl)2 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)2