Search in sources :

Example 71 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class InvertedIndexPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    AbstractUnnestMapOperator unnestMapOp = (AbstractUnnestMapOperator) op;
    ILogicalExpression unnestExpr = unnestMapOp.getExpressionRef().getValue();
    if (unnestExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
        throw new IllegalStateException();
    }
    AbstractFunctionCallExpression unnestFuncExpr = (AbstractFunctionCallExpression) unnestExpr;
    if (unnestFuncExpr.getFunctionIdentifier() != BuiltinFunctions.INDEX_SEARCH) {
        return;
    }
    InvertedIndexJobGenParams jobGenParams = new InvertedIndexJobGenParams();
    jobGenParams.readFromFuncArgs(unnestFuncExpr.getArguments());
    MetadataProvider metadataProvider = (MetadataProvider) context.getMetadataProvider();
    Dataset dataset;
    try {
        dataset = metadataProvider.findDataset(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
    } catch (MetadataException e) {
        throw new AlgebricksException(e);
    }
    int[] keyIndexes = getKeyIndexes(jobGenParams.getKeyVarList(), inputSchemas);
    int[] minFilterFieldIndexes = getKeyIndexes(unnestMapOp.getMinFilterVars(), inputSchemas);
    int[] maxFilterFieldIndexes = getKeyIndexes(unnestMapOp.getMaxFilterVars(), inputSchemas);
    boolean retainNull = false;
    if (op.getOperatorTag() == LogicalOperatorTag.LEFT_OUTER_UNNEST_MAP) {
        // By nature, LEFT_OUTER_UNNEST_MAP should generate null values for non-matching tuples.
        retainNull = true;
    }
    // Build runtime.
    Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> invIndexSearch = buildInvertedIndexRuntime(metadataProvider, context, builder.getJobSpec(), unnestMapOp, opSchema, jobGenParams.getRetainInput(), retainNull, jobGenParams.getDatasetName(), dataset, jobGenParams.getIndexName(), jobGenParams.getSearchKeyType(), keyIndexes, jobGenParams.getSearchModifierType(), jobGenParams.getSimilarityThreshold(), minFilterFieldIndexes, maxFilterFieldIndexes, jobGenParams.getIsFullTextSearch());
    // Contribute operator in hyracks job.
    builder.contributeHyracksOperator(unnestMapOp, invIndexSearch.first);
    builder.contributeAlgebricksPartitionConstraint(invIndexSearch.first, invIndexSearch.second);
    ILogicalOperator srcExchange = unnestMapOp.getInputs().get(0).getValue();
    builder.contributeGraphEdge(srcExchange, 0, unnestMapOp, 0);
}
Also used : Dataset(org.apache.asterix.metadata.entities.Dataset) AbstractFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) MetadataException(org.apache.asterix.metadata.MetadataException) InvertedIndexJobGenParams(org.apache.asterix.optimizer.rules.am.InvertedIndexJobGenParams) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) MetadataProvider(org.apache.asterix.metadata.declared.MetadataProvider) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AbstractUnnestMapOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractUnnestMapOperator) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)

Example 72 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class RTreeSearchPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    AbstractUnnestMapOperator unnestMap = (AbstractUnnestMapOperator) op;
    ILogicalExpression unnestExpr = unnestMap.getExpressionRef().getValue();
    if (unnestExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
        throw new IllegalStateException();
    }
    AbstractFunctionCallExpression unnestFuncExpr = (AbstractFunctionCallExpression) unnestExpr;
    FunctionIdentifier funcIdent = unnestFuncExpr.getFunctionIdentifier();
    if (!funcIdent.equals(BuiltinFunctions.INDEX_SEARCH)) {
        return;
    }
    RTreeJobGenParams jobGenParams = new RTreeJobGenParams();
    jobGenParams.readFromFuncArgs(unnestFuncExpr.getArguments());
    int[] keyIndexes = getKeyIndexes(jobGenParams.getKeyVarList(), inputSchemas);
    int[] minFilterFieldIndexes = getKeyIndexes(unnestMap.getMinFilterVars(), inputSchemas);
    int[] maxFilterFieldIndexes = getKeyIndexes(unnestMap.getMaxFilterVars(), inputSchemas);
    MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
    Dataset dataset = mp.findDataset(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
    IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(unnestMap);
    List<LogicalVariable> outputVars = unnestMap.getVariables();
    if (jobGenParams.getRetainInput()) {
        outputVars = new ArrayList<LogicalVariable>();
        VariableUtilities.getLiveVariables(unnestMap, outputVars);
    }
    boolean retainNull = false;
    if (op.getOperatorTag() == LogicalOperatorTag.LEFT_OUTER_UNNEST_MAP) {
        // By nature, LEFT_OUTER_UNNEST_MAP should generate null values for non-matching tuples.
        retainNull = true;
    }
    Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> rtreeSearch = mp.buildRtreeRuntime(builder.getJobSpec(), outputVars, opSchema, typeEnv, context, jobGenParams.getRetainInput(), retainNull, dataset, jobGenParams.getIndexName(), keyIndexes, minFilterFieldIndexes, maxFilterFieldIndexes);
    builder.contributeHyracksOperator(unnestMap, rtreeSearch.first);
    builder.contributeAlgebricksPartitionConstraint(rtreeSearch.first, rtreeSearch.second);
    ILogicalOperator srcExchange = unnestMap.getInputs().get(0).getValue();
    builder.contributeGraphEdge(srcExchange, 0, unnestMap, 0);
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) Dataset(org.apache.asterix.metadata.entities.Dataset) AbstractFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) FunctionIdentifier(org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) RTreeJobGenParams(org.apache.asterix.optimizer.rules.am.RTreeJobGenParams) MetadataProvider(org.apache.asterix.metadata.declared.MetadataProvider) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AbstractUnnestMapOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractUnnestMapOperator) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment)

Example 73 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class BTreeSearchPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    AbstractUnnestMapOperator unnestMap = (AbstractUnnestMapOperator) op;
    ILogicalExpression unnestExpr = unnestMap.getExpressionRef().getValue();
    if (unnestExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
        throw new IllegalStateException();
    }
    AbstractFunctionCallExpression unnestFuncExpr = (AbstractFunctionCallExpression) unnestExpr;
    FunctionIdentifier funcIdent = unnestFuncExpr.getFunctionIdentifier();
    if (!funcIdent.equals(BuiltinFunctions.INDEX_SEARCH)) {
        return;
    }
    BTreeJobGenParams jobGenParams = new BTreeJobGenParams();
    jobGenParams.readFromFuncArgs(unnestFuncExpr.getArguments());
    int[] lowKeyIndexes = getKeyIndexes(jobGenParams.getLowKeyVarList(), inputSchemas);
    int[] highKeyIndexes = getKeyIndexes(jobGenParams.getHighKeyVarList(), inputSchemas);
    int[] minFilterFieldIndexes = getKeyIndexes(unnestMap.getMinFilterVars(), inputSchemas);
    int[] maxFilterFieldIndexes = getKeyIndexes(unnestMap.getMaxFilterVars(), inputSchemas);
    MetadataProvider metadataProvider = (MetadataProvider) context.getMetadataProvider();
    Dataset dataset = metadataProvider.findDataset(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
    IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(op);
    // By nature, LEFT_OUTER_UNNEST_MAP should generate null values for non-matching tuples.
    boolean retainMissing = op.getOperatorTag() == LogicalOperatorTag.LEFT_OUTER_UNNEST_MAP;
    Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> btreeSearch = metadataProvider.buildBtreeRuntime(builder.getJobSpec(), opSchema, typeEnv, context, jobGenParams.getRetainInput(), retainMissing, dataset, jobGenParams.getIndexName(), lowKeyIndexes, highKeyIndexes, jobGenParams.isLowKeyInclusive(), jobGenParams.isHighKeyInclusive(), minFilterFieldIndexes, maxFilterFieldIndexes);
    builder.contributeHyracksOperator(unnestMap, btreeSearch.first);
    builder.contributeAlgebricksPartitionConstraint(btreeSearch.first, btreeSearch.second);
    ILogicalOperator srcExchange = unnestMap.getInputs().get(0).getValue();
    builder.contributeGraphEdge(srcExchange, 0, unnestMap, 0);
}
Also used : Dataset(org.apache.asterix.metadata.entities.Dataset) AbstractFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression) BTreeJobGenParams(org.apache.asterix.optimizer.rules.am.BTreeJobGenParams) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) FunctionIdentifier(org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) MetadataProvider(org.apache.asterix.metadata.declared.MetadataProvider) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AbstractUnnestMapOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractUnnestMapOperator) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment)

Example 74 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class ExternalDataLookupPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    UnnestMapOperator unnestMap = (UnnestMapOperator) op;
    ILogicalExpression expr = unnestMap.getExpressionRef().getValue();
    if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
        throw new IllegalStateException();
    }
    AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
    FunctionIdentifier funcIdent = funcExpr.getFunctionIdentifier();
    if (!funcIdent.equals(BuiltinFunctions.EXTERNAL_LOOKUP)) {
        return;
    }
    int[] ridIndexes = getKeyIndexes(ridVarList, inputSchemas);
    IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(op);
    MetadataProvider metadataProvider = (MetadataProvider) context.getMetadataProvider();
    Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> externalLoopup = metadataProvider.buildExternalDataLookupRuntime(builder.getJobSpec(), dataset, ridIndexes, retainInput, typeEnv, opSchema, context, metadataProvider, retainMissing);
    builder.contributeHyracksOperator(unnestMap, externalLoopup.first);
    builder.contributeAlgebricksPartitionConstraint(externalLoopup.first, externalLoopup.second);
    ILogicalOperator srcExchange = unnestMap.getInputs().get(0).getValue();
    builder.contributeGraphEdge(srcExchange, 0, unnestMap, 0);
}
Also used : FunctionIdentifier(org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) MetadataProvider(org.apache.asterix.metadata.declared.MetadataProvider) UnnestMapOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AbstractFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment)

Example 75 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class JoinMultiComparator method generateOptimizedHashJoinRuntime.

private IOperatorDescriptor generateOptimizedHashJoinRuntime(JobGenContext context, IOperatorSchema[] inputSchemas, int[] keysLeft, int[] keysRight, IBinaryHashFunctionFamily[] hashFunFamilies, IBinaryComparatorFactory[] comparatorFactories, IPredicateEvaluatorFactory predEvaluatorFactory, RecordDescriptor recDescriptor, IOperatorDescriptorRegistry spec) throws AlgebricksException {
    IOperatorDescriptor opDesc;
    try {
        switch(kind) {
            case INNER:
                opDesc = new OptimizedHybridHashJoinOperatorDescriptor(spec, getMemSizeInFrames(), maxInputBuildSizeInFrames, getFudgeFactor(), keysLeft, keysRight, hashFunFamilies, comparatorFactories, recDescriptor, new JoinMultiComparatorFactory(comparatorFactories, keysLeft, keysRight), new JoinMultiComparatorFactory(comparatorFactories, keysRight, keysLeft), predEvaluatorFactory);
                break;
            case LEFT_OUTER:
                IMissingWriterFactory[] nonMatchWriterFactories = new IMissingWriterFactory[inputSchemas[1].getSize()];
                for (int j = 0; j < nonMatchWriterFactories.length; j++) {
                    nonMatchWriterFactories[j] = context.getMissingWriterFactory();
                }
                opDesc = new OptimizedHybridHashJoinOperatorDescriptor(spec, getMemSizeInFrames(), maxInputBuildSizeInFrames, getFudgeFactor(), keysLeft, keysRight, hashFunFamilies, comparatorFactories, recDescriptor, new JoinMultiComparatorFactory(comparatorFactories, keysLeft, keysRight), new JoinMultiComparatorFactory(comparatorFactories, keysRight, keysLeft), predEvaluatorFactory, true, nonMatchWriterFactories);
                break;
            default:
                throw new NotImplementedException();
        }
    } catch (HyracksDataException e) {
        throw new AlgebricksException(e);
    }
    return opDesc;
}
Also used : OptimizedHybridHashJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) IMissingWriterFactory(org.apache.hyracks.api.dataflow.value.IMissingWriterFactory) NotImplementedException(org.apache.hyracks.algebricks.common.exceptions.NotImplementedException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Aggregations

IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)89 JobSpecification (org.apache.hyracks.api.job.JobSpecification)61 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)52 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)51 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)48 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)48 Test (org.junit.Test)41 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)37 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)34 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)34 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)33 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)30 FileSplit (org.apache.hyracks.api.io.FileSplit)28 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)26 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)26 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)24 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)23 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)19 PlainFileWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor)19 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)18