Search in sources :

Example 21 with AbstractSingleActivityOperatorDescriptor

use of org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor in project asterixdb by apache.

the class SecondaryInvertedIndexOperationsHelper method buildLoadingJobSpec.

@Override
public JobSpecification buildLoadingJobSpec() throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    JobId jobId = IndexUtil.bindJobEventListener(spec, metadataProvider);
    // Create dummy key provider for feeding the primary index scan.
    IOperatorDescriptor keyProviderOp = DatasetUtil.createDummyKeyProviderOp(spec, dataset, metadataProvider);
    // Create primary index scan op.
    IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, dataset, jobId);
    IOperatorDescriptor sourceOp = primaryScanOp;
    boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
    int numSecondaryKeys = index.getKeyFieldNames().size();
    if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
        sourceOp = createCastOp(spec, dataset.getDatasetType());
        spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
    }
    AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, numSecondaryKeys, secondaryRecDesc);
    // If any of the secondary fields are nullable, then add a select op
    // that filters nulls.
    AlgebricksMetaOperatorDescriptor selectOp = null;
    if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
        selectOp = createFilterNullsSelectOp(spec, numSecondaryKeys, secondaryRecDesc);
    }
    // Create a tokenizer op.
    AbstractOperatorDescriptor tokenizerOp = createTokenizerOp(spec);
    // Sort by token + primary keys.
    ExternalSortOperatorDescriptor sortOp = createSortOp(spec, tokenKeyPairComparatorFactories, tokenKeyPairRecDesc);
    // Create secondary inverted index bulk load op.
    AbstractSingleActivityOperatorDescriptor invIndexBulkLoadOp = createInvertedIndexBulkLoadOp(spec);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, invIndexBulkLoadOp, secondaryPartitionConstraint);
    AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] {});
    // Connect the operators.
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
    if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
        spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, tokenizerOp, 0);
    } else {
        spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, tokenizerOp, 0);
    }
    spec.connect(new OneToOneConnectorDescriptor(spec), tokenizerOp, 0, sortOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, invIndexBulkLoadOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), invIndexBulkLoadOp, 0, metaOp, 0);
    spec.addRoot(metaOp);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) AbstractOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractOperatorDescriptor) JobId(org.apache.asterix.common.transactions.JobId) SinkRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.base.SinkRuntimeFactory)

Aggregations

AbstractSingleActivityOperatorDescriptor (org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor)21 JobSpecification (org.apache.hyracks.api.job.JobSpecification)19 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)19 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)16 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)16 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)15 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)15 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)15 IFieldAggregateDescriptorFactory (org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory)15 IntSumFieldAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory)15 MultiFieldsAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory)15 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)14 Test (org.junit.Test)14 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)13 HashSpillableTableFactory (org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory)9 ExternalGroupOperatorDescriptor (org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor)9 IBinaryHashFunctionFamily (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily)8 UTF8StringNormalizedKeyComputerFactory (org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory)8 PreclusteredGroupOperatorDescriptor (org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor)6 CountFieldAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory)5