Search in sources :

Example 41 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class APIFramework method compileQuery.

public JobSpecification compileQuery(IClusterInfoCollector clusterInfoCollector, MetadataProvider metadataProvider, Query rwQ, int varCounter, String outputDatasetName, SessionOutput output, ICompiledDmlStatement statement) throws AlgebricksException, RemoteException, ACIDException {
    SessionConfig conf = output.config();
    if (!conf.is(SessionConfig.FORMAT_ONLY_PHYSICAL_OPS) && conf.is(SessionConfig.OOB_REWRITTEN_EXPR_TREE)) {
        output.out().println();
        printPlanPrefix(output, "Rewritten expression tree");
        if (rwQ != null) {
            rwQ.accept(astPrintVisitorFactory.createLangVisitor(output.out()), 0);
        }
        printPlanPostfix(output);
    }
    org.apache.asterix.common.transactions.JobId asterixJobId = JobIdFactory.generateJobId();
    metadataProvider.setJobId(asterixJobId);
    ILangExpressionToPlanTranslator t = translatorFactory.createExpressionToPlanTranslator(metadataProvider, varCounter);
    ILogicalPlan plan;
    // statement = null when it's a query
    if (statement == null || statement.getKind() != Statement.Kind.LOAD) {
        plan = t.translate(rwQ, outputDatasetName, statement);
    } else {
        plan = t.translateLoad(statement);
    }
    if (!conf.is(SessionConfig.FORMAT_ONLY_PHYSICAL_OPS) && conf.is(SessionConfig.OOB_LOGICAL_PLAN)) {
        output.out().println();
        printPlanPrefix(output, "Logical plan");
        if (rwQ != null || (statement != null && statement.getKind() == Statement.Kind.LOAD)) {
            LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor(output.out());
            PlanPrettyPrinter.printPlan(plan, pvisitor, 0);
        }
        printPlanPostfix(output);
    }
    CompilerProperties compilerProperties = metadataProvider.getApplicationContext().getCompilerProperties();
    int frameSize = compilerProperties.getFrameSize();
    Map<String, String> querySpecificConfig = metadataProvider.getConfig();
    // Validates the user-overridden query parameters.
    validateConfig(querySpecificConfig);
    int sortFrameLimit = getFrameLimit(CompilerProperties.COMPILER_SORTMEMORY_KEY, querySpecificConfig.get(CompilerProperties.COMPILER_SORTMEMORY_KEY), compilerProperties.getSortMemorySize(), frameSize, MIN_FRAME_LIMIT_FOR_SORT);
    int groupFrameLimit = getFrameLimit(CompilerProperties.COMPILER_GROUPMEMORY_KEY, querySpecificConfig.get(CompilerProperties.COMPILER_GROUPMEMORY_KEY), compilerProperties.getGroupMemorySize(), frameSize, MIN_FRAME_LIMIT_FOR_GROUP_BY);
    int joinFrameLimit = getFrameLimit(CompilerProperties.COMPILER_JOINMEMORY_KEY, querySpecificConfig.get(CompilerProperties.COMPILER_JOINMEMORY_KEY), compilerProperties.getJoinMemorySize(), frameSize, MIN_FRAME_LIMIT_FOR_JOIN);
    OptimizationConfUtil.getPhysicalOptimizationConfig().setFrameSize(frameSize);
    OptimizationConfUtil.getPhysicalOptimizationConfig().setMaxFramesExternalSort(sortFrameLimit);
    OptimizationConfUtil.getPhysicalOptimizationConfig().setMaxFramesExternalGroupBy(groupFrameLimit);
    OptimizationConfUtil.getPhysicalOptimizationConfig().setMaxFramesForJoin(joinFrameLimit);
    HeuristicCompilerFactoryBuilder builder = new HeuristicCompilerFactoryBuilder(OptimizationContextFactory.INSTANCE);
    builder.setPhysicalOptimizationConfig(OptimizationConfUtil.getPhysicalOptimizationConfig());
    builder.setLogicalRewrites(ruleSetFactory.getLogicalRewrites(metadataProvider.getApplicationContext()));
    builder.setPhysicalRewrites(ruleSetFactory.getPhysicalRewrites(metadataProvider.getApplicationContext()));
    IDataFormat format = metadataProvider.getFormat();
    ICompilerFactory compilerFactory = builder.create();
    builder.setExpressionEvalSizeComputer(format.getExpressionEvalSizeComputer());
    builder.setIMergeAggregationExpressionFactory(new MergeAggregationExpressionFactory());
    builder.setPartialAggregationTypeComputer(new PartialAggregationTypeComputer());
    builder.setExpressionTypeComputer(ExpressionTypeComputer.INSTANCE);
    builder.setMissableTypeComputer(MissableTypeComputer.INSTANCE);
    builder.setConflictingTypeResolver(ConflictingTypeResolver.INSTANCE);
    int parallelism = getParallelism(querySpecificConfig.get(CompilerProperties.COMPILER_PARALLELISM_KEY), compilerProperties.getParallelism());
    AlgebricksAbsolutePartitionConstraint computationLocations = chooseLocations(clusterInfoCollector, parallelism, metadataProvider.getClusterLocations());
    builder.setClusterLocations(computationLocations);
    ICompiler compiler = compilerFactory.createCompiler(plan, metadataProvider, t.getVarCounter());
    if (conf.isOptimize()) {
        compiler.optimize();
        if (conf.is(SessionConfig.OOB_OPTIMIZED_LOGICAL_PLAN)) {
            if (conf.is(SessionConfig.FORMAT_ONLY_PHYSICAL_OPS)) {
                // For Optimizer tests.
                AlgebricksAppendable buffer = new AlgebricksAppendable(output.out());
                PlanPrettyPrinter.printPhysicalOps(plan, buffer, 0);
            } else {
                printPlanPrefix(output, "Optimized logical plan");
                if (rwQ != null || (statement != null && statement.getKind() == Statement.Kind.LOAD)) {
                    LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor(output.out());
                    PlanPrettyPrinter.printPlan(plan, pvisitor, 0);
                }
                printPlanPostfix(output);
            }
        }
    }
    if (rwQ != null && rwQ.isExplain()) {
        try {
            LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();
            PlanPrettyPrinter.printPlan(plan, pvisitor, 0);
            ResultUtil.printResults(metadataProvider.getApplicationContext(), pvisitor.get().toString(), output, new Stats(), null);
            return null;
        } catch (IOException e) {
            throw new AlgebricksException(e);
        }
    }
    if (!conf.isGenerateJobSpec()) {
        return null;
    }
    builder.setBinaryBooleanInspectorFactory(format.getBinaryBooleanInspectorFactory());
    builder.setBinaryIntegerInspectorFactory(format.getBinaryIntegerInspectorFactory());
    builder.setComparatorFactoryProvider(format.getBinaryComparatorFactoryProvider());
    builder.setExpressionRuntimeProvider(new ExpressionRuntimeProvider(QueryLogicalExpressionJobGen.INSTANCE));
    builder.setHashFunctionFactoryProvider(format.getBinaryHashFunctionFactoryProvider());
    builder.setHashFunctionFamilyProvider(format.getBinaryHashFunctionFamilyProvider());
    builder.setMissingWriterFactory(format.getMissingWriterFactory());
    builder.setPredicateEvaluatorFactoryProvider(format.getPredicateEvaluatorFactoryProvider());
    final SessionConfig.OutputFormat outputFormat = conf.fmt();
    switch(outputFormat) {
        case LOSSLESS_JSON:
            builder.setPrinterProvider(format.getLosslessJSONPrinterFactoryProvider());
            break;
        case CSV:
            builder.setPrinterProvider(format.getCSVPrinterFactoryProvider());
            break;
        case ADM:
            builder.setPrinterProvider(format.getADMPrinterFactoryProvider());
            break;
        case CLEAN_JSON:
            builder.setPrinterProvider(format.getCleanJSONPrinterFactoryProvider());
            break;
        default:
            throw new AlgebricksException("Unexpected OutputFormat: " + outputFormat);
    }
    builder.setSerializerDeserializerProvider(format.getSerdeProvider());
    builder.setTypeTraitProvider(format.getTypeTraitProvider());
    builder.setNormalizedKeyComputerFactoryProvider(format.getNormalizedKeyComputerFactoryProvider());
    JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(asterixJobId, metadataProvider.isWriteTransaction());
    JobSpecification spec = compiler.createJob(metadataProvider.getApplicationContext(), jobEventListenerFactory);
    // When the top-level statement is a query, the statement parameter is null.
    if (statement == null) {
        // Sets a required capacity, only for read-only queries.
        // DDLs and DMLs are considered not that frequent.
        spec.setRequiredClusterCapacity(ResourceUtils.getRequiredCompacity(plan, computationLocations, sortFrameLimit, groupFrameLimit, joinFrameLimit, frameSize));
    }
    if (conf.is(SessionConfig.OOB_HYRACKS_JOB)) {
        printPlanPrefix(output, "Hyracks job");
        if (rwQ != null) {
            try {
                output.out().println(new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(spec.toJSON()));
            } catch (IOException e) {
                throw new AlgebricksException(e);
            }
            output.out().println(spec.getUserConstraints());
        }
        printPlanPostfix(output);
    }
    return spec;
}
Also used : IMergeAggregationExpressionFactory(org.apache.hyracks.algebricks.core.algebra.expressions.IMergeAggregationExpressionFactory) MergeAggregationExpressionFactory(org.apache.asterix.dataflow.data.common.MergeAggregationExpressionFactory) ICompilerFactory(org.apache.hyracks.algebricks.compiler.api.ICompilerFactory) SessionConfig(org.apache.asterix.translator.SessionConfig) ExpressionRuntimeProvider(org.apache.hyracks.algebricks.core.algebra.expressions.ExpressionRuntimeProvider) ILangExpressionToPlanTranslator(org.apache.asterix.algebra.base.ILangExpressionToPlanTranslator) IDataFormat(org.apache.asterix.formats.base.IDataFormat) JobSpecification(org.apache.hyracks.api.job.JobSpecification) HeuristicCompilerFactoryBuilder(org.apache.hyracks.algebricks.compiler.api.HeuristicCompilerFactoryBuilder) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) AlgebricksAppendable(org.apache.hyracks.algebricks.core.algebra.prettyprint.AlgebricksAppendable) LogicalOperatorPrettyPrintVisitor(org.apache.hyracks.algebricks.core.algebra.prettyprint.LogicalOperatorPrettyPrintVisitor) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) CompilerProperties(org.apache.asterix.common.config.CompilerProperties) ICompiler(org.apache.hyracks.algebricks.compiler.api.ICompiler) IOException(java.io.IOException) JobEventListenerFactory(org.apache.asterix.runtime.job.listener.JobEventListenerFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) PartialAggregationTypeComputer(org.apache.asterix.dataflow.data.common.PartialAggregationTypeComputer) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) Stats(org.apache.asterix.translator.IStatementExecutor.Stats) ILogicalPlan(org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan)

Example 42 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class SecondaryBTreeOperationsHelper method buildLoadingJobSpec.

@Override
public JobSpecification buildLoadingJobSpec() throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
    int[] fieldPermutation = createFieldPermutationForBulkLoadOp(index.getKeyFieldNames().size());
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondaryFileSplitProvider);
    if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
        /*
             * In case of external data,
             * this method is used to build loading jobs for both initial load on index creation
             * and transaction load on dataset referesh
             */
        // Create external indexing scan operator
        ExternalScanOperatorDescriptor primaryScanOp = createExternalIndexingOp(spec);
        // Assign op.
        AbstractOperatorDescriptor sourceOp = primaryScanOp;
        if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
            sourceOp = createCastOp(spec, dataset.getDatasetType());
            spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
        }
        AlgebricksMetaOperatorDescriptor asterixAssignOp = createExternalAssignOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
        // If any of the secondary fields are nullable, then add a select op that filters nulls.
        AlgebricksMetaOperatorDescriptor selectOp = null;
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            selectOp = createFilterNullsSelectOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
        }
        // Sort by secondary keys.
        ExternalSortOperatorDescriptor sortOp = createSortOp(spec, secondaryComparatorFactories, secondaryRecDesc);
        // Create secondary BTree bulk load op.
        AbstractSingleActivityOperatorDescriptor secondaryBulkLoadOp;
        IOperatorDescriptor root;
        if (externalFiles != null) {
            // Transaction load
            secondaryBulkLoadOp = createExternalIndexBulkModifyOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        } else {
            // Initial load
            secondaryBulkLoadOp = createExternalIndexBulkLoadOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        }
        AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
        spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
        root = metaOp;
        spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
            spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
        } else {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
        }
        spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
        spec.addRoot(root);
        spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
        return spec;
    } else {
        // Create dummy key provider for feeding the primary index scan.
        IOperatorDescriptor keyProviderOp = DatasetUtil.createDummyKeyProviderOp(spec, dataset, metadataProvider);
        JobId jobId = IndexUtil.bindJobEventListener(spec, metadataProvider);
        // Create primary index scan op.
        IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, dataset, jobId);
        // Assign op.
        IOperatorDescriptor sourceOp = primaryScanOp;
        if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
            sourceOp = createCastOp(spec, dataset.getDatasetType());
            spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
        }
        AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
        // If any of the secondary fields are nullable, then add a select op that filters nulls.
        AlgebricksMetaOperatorDescriptor selectOp = null;
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            selectOp = createFilterNullsSelectOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
        }
        // Sort by secondary keys.
        ExternalSortOperatorDescriptor sortOp = createSortOp(spec, secondaryComparatorFactories, secondaryRecDesc);
        // Create secondary BTree bulk load op.
        TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
        // Connect the operators.
        spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
            spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
        } else {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
        }
        spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
        spec.addRoot(metaOp);
        spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
        return spec;
    }
}
Also used : ExternalScanOperatorDescriptor(org.apache.asterix.external.operators.ExternalScanOperatorDescriptor) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) AbstractOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractOperatorDescriptor) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) JobId(org.apache.asterix.common.transactions.JobId) SinkRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.base.SinkRuntimeFactory)

Example 43 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class SecondaryRTreeOperationsHelper method buildLoadingJobSpec.

@Override
public JobSpecification buildLoadingJobSpec() throws AsterixException, AlgebricksException {
    /***************************************************
         * [ About PointMBR Optimization ]
         * Instead of storing a MBR(4 doubles) for a point(2 doubles) in RTree leaf node,
         * PointMBR concept is introduced.
         * PointMBR is a way to store a point as 2 doubles in RTree leaf node.
         * This reduces RTree index size roughly in half.
         * In order to fully benefit from the PointMBR concept, besides RTree,
         * external sort operator during bulk-loading (from either data loading or index creation)
         * must deal with point as 2 doubles instead of 4 doubles. Otherwise, external sort will suffer from twice as
         * many doubles as it actually requires. For this purpose,
         * PointMBR specific optimization logic is added as follows:
         * 1) CreateMBR function in assign operator generates 2 doubles, instead of 4 doubles.
         * 2) External sort operator sorts points represented with 2 doubles.
         * 3) Bulk-loading in RTree takes 4 doubles by reading 2 doubles twice and then,
         * do the same work as non-point MBR cases.
         ***************************************************/
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    int[] fieldPermutation = createFieldPermutationForBulkLoadOp(numNestedSecondaryKeyFields);
    int numNestedSecondaryKeFieldsConsideringPointMBR = isPointMBR ? numNestedSecondaryKeyFields / 2 : numNestedSecondaryKeyFields;
    RecordDescriptor secondaryRecDescConsideringPointMBR = isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc;
    boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
    IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondaryFileSplitProvider);
    if (dataset.getDatasetType() == DatasetType.INTERNAL) {
        // Create dummy key provider for feeding the primary index scan.
        IOperatorDescriptor keyProviderOp = DatasetUtil.createDummyKeyProviderOp(spec, dataset, metadataProvider);
        JobId jobId = IndexUtil.bindJobEventListener(spec, metadataProvider);
        // Create primary index scan op.
        IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, dataset, jobId);
        // Assign op.
        IOperatorDescriptor sourceOp = primaryScanOp;
        if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
            sourceOp = createCastOp(spec, dataset.getDatasetType());
            spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
        }
        AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
        // If any of the secondary fields are nullable, then add a select op that filters nulls.
        AlgebricksMetaOperatorDescriptor selectOp = null;
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            selectOp = createFilterNullsSelectOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
        }
        // Sort by secondary keys.
        ExternalSortOperatorDescriptor sortOp = createSortOp(spec, new IBinaryComparatorFactory[] { MetadataProvider.proposeLinearizer(keyType, secondaryComparatorFactories.length) }, isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc);
        // Create secondary RTree bulk load op.
        TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] {});
        // Connect the operators.
        spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
            spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
        } else {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
        }
        spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
        spec.addRoot(metaOp);
        spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    } else {
        // External dataset
        /*
             * In case of external data, this method is used to build loading jobs for both
             * initial load on index creation
             * and transaction load on dataset referesh
             */
        // Create external indexing scan operator
        ExternalScanOperatorDescriptor primaryScanOp = createExternalIndexingOp(spec);
        AbstractOperatorDescriptor sourceOp = primaryScanOp;
        if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
            sourceOp = createCastOp(spec, dataset.getDatasetType());
            spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
        }
        // Assign op.
        AlgebricksMetaOperatorDescriptor asterixAssignOp = createExternalAssignOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
        // If any of the secondary fields are nullable, then add a select op that filters nulls.
        AlgebricksMetaOperatorDescriptor selectOp = null;
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            selectOp = createFilterNullsSelectOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
        }
        // Sort by secondary keys.
        ExternalSortOperatorDescriptor sortOp = createSortOp(spec, new IBinaryComparatorFactory[] { MetadataProvider.proposeLinearizer(keyType, secondaryComparatorFactories.length) }, isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc);
        // Create secondary RTree bulk load op.
        IOperatorDescriptor root;
        AbstractSingleActivityOperatorDescriptor secondaryBulkLoadOp;
        if (externalFiles != null) {
            // Transaction load
            secondaryBulkLoadOp = createExternalIndexBulkModifyOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        } else {
            // Initial load
            secondaryBulkLoadOp = createExternalIndexBulkLoadOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        }
        AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
        spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
        root = metaOp;
        spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
            spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
        } else {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
        }
        spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
        spec.addRoot(root);
        spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    }
    return spec;
}
Also used : ExternalScanOperatorDescriptor(org.apache.asterix.external.operators.ExternalScanOperatorDescriptor) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) AbstractOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractOperatorDescriptor) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) JobId(org.apache.asterix.common.transactions.JobId) SinkRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.base.SinkRuntimeFactory)

Example 44 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class SecondaryTreeIndexOperationsHelper method buildCreationJobSpec.

@Override
public JobSpecification buildCreationJobSpec() throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider();
    IResourceFactory resourceFactory = dataset.getResourceFactory(metadataProvider, index, itemType, metaType, mergePolicyFactory, mergePolicyProperties);
    IIndexBuilderFactory indexBuilderFactory = new IndexBuilderFactory(storageComponentProvider.getStorageManager(), secondaryFileSplitProvider, resourceFactory, !dataset.isTemp());
    IndexCreateOperatorDescriptor secondaryIndexCreateOp = new IndexCreateOperatorDescriptor(spec, indexBuilderFactory);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, secondaryIndexCreateOp, secondaryPartitionConstraint);
    spec.addRoot(secondaryIndexCreateOp);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : IStorageComponentProvider(org.apache.asterix.common.context.IStorageComponentProvider) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IIndexBuilderFactory(org.apache.hyracks.storage.am.common.api.IIndexBuilderFactory) IndexBuilderFactory(org.apache.hyracks.storage.am.common.build.IndexBuilderFactory) IIndexBuilderFactory(org.apache.hyracks.storage.am.common.api.IIndexBuilderFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IResourceFactory(org.apache.hyracks.storage.common.IResourceFactory) IndexCreateOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.IndexCreateOperatorDescriptor)

Example 45 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class SecondaryTreeIndexOperationsHelper method buildCompactJobSpec.

@Override
public JobSpecification buildCompactJobSpec() throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, index.getIndexName());
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first);
    LSMTreeIndexCompactOperatorDescriptor compactOp = new LSMTreeIndexCompactOperatorDescriptor(spec, dataflowHelperFactory);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, secondaryPartitionConstraint);
    spec.addRoot(compactOp);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : LSMTreeIndexCompactOperatorDescriptor(org.apache.hyracks.storage.am.lsm.common.dataflow.LSMTreeIndexCompactOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)

Aggregations

JobSpecification (org.apache.hyracks.api.job.JobSpecification)182 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)90 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)88 Test (org.junit.Test)82 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)77 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)67 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)61 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)59 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)59 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)45 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)45 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)41 FileSplit (org.apache.hyracks.api.io.FileSplit)40 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)38 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)35 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)29 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)29 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)26 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)24 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)23