Search in sources :

Example 86 with OneToOneConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor in project asterixdb by apache.

the class RTreeSecondaryIndexStatsOperatorTest method showPrimaryIndexStats.

@Test
public void showPrimaryIndexStats() throws Exception {
    JobSpecification spec = new JobSpecification();
    TreeIndexStatsOperatorDescriptor secondaryStatsOp = new TreeIndexStatsOperatorDescriptor(spec, storageManager, secondaryHelperFactory);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryStatsOp, NC1_ID);
    IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
    IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), secondaryStatsOp, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) TreeIndexStatsOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexStatsOperatorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 87 with OneToOneConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor in project asterixdb by apache.

the class JobFailureTest method execTest.

private void execTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    AbstractSingleActivityOperatorDescriptor sourceOpDesc = new ExceptionOnCreatePushRuntimeOperatorDescriptor(spec, 0, 1, new int[] { 4 }, true);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sourceOpDesc, ASTERIX_IDS);
    SinkOperatorDescriptor sinkOpDesc = new SinkOperatorDescriptor(spec, 1);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sinkOpDesc, ASTERIX_IDS);
    IConnectorDescriptor conn = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn, sourceOpDesc, 0, sinkOpDesc, 0);
    spec.addRoot(sinkOpDesc);
    try {
        runTest(spec);
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    }
    Assert.assertTrue(ExceptionOnCreatePushRuntimeOperatorDescriptor.stats() + ExceptionOnCreatePushRuntimeOperatorDescriptor.succeed(), ExceptionOnCreatePushRuntimeOperatorDescriptor.succeed());
// should also check the content of the different ncs
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ExceptionOnCreatePushRuntimeOperatorDescriptor(org.apache.hyracks.tests.util.ExceptionOnCreatePushRuntimeOperatorDescriptor) SinkOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.SinkOperatorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification)

Example 88 with OneToOneConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor in project asterixdb by apache.

the class SecondaryInvertedIndexOperationsHelper method buildLoadingJobSpec.

@Override
public JobSpecification buildLoadingJobSpec() throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    JobId jobId = IndexUtil.bindJobEventListener(spec, metadataProvider);
    // Create dummy key provider for feeding the primary index scan.
    IOperatorDescriptor keyProviderOp = DatasetUtil.createDummyKeyProviderOp(spec, dataset, metadataProvider);
    // Create primary index scan op.
    IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, dataset, jobId);
    IOperatorDescriptor sourceOp = primaryScanOp;
    boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
    int numSecondaryKeys = index.getKeyFieldNames().size();
    if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
        sourceOp = createCastOp(spec, dataset.getDatasetType());
        spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
    }
    AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, numSecondaryKeys, secondaryRecDesc);
    // If any of the secondary fields are nullable, then add a select op
    // that filters nulls.
    AlgebricksMetaOperatorDescriptor selectOp = null;
    if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
        selectOp = createFilterNullsSelectOp(spec, numSecondaryKeys, secondaryRecDesc);
    }
    // Create a tokenizer op.
    AbstractOperatorDescriptor tokenizerOp = createTokenizerOp(spec);
    // Sort by token + primary keys.
    ExternalSortOperatorDescriptor sortOp = createSortOp(spec, tokenKeyPairComparatorFactories, tokenKeyPairRecDesc);
    // Create secondary inverted index bulk load op.
    AbstractSingleActivityOperatorDescriptor invIndexBulkLoadOp = createInvertedIndexBulkLoadOp(spec);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, invIndexBulkLoadOp, secondaryPartitionConstraint);
    AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] {});
    // Connect the operators.
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
    if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
        spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, tokenizerOp, 0);
    } else {
        spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, tokenizerOp, 0);
    }
    spec.connect(new OneToOneConnectorDescriptor(spec), tokenizerOp, 0, sortOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, invIndexBulkLoadOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), invIndexBulkLoadOp, 0, metaOp, 0);
    spec.addRoot(metaOp);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) AbstractOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractOperatorDescriptor) JobId(org.apache.asterix.common.transactions.JobId) SinkRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.base.SinkRuntimeFactory)

Aggregations

JobSpecification (org.apache.hyracks.api.job.JobSpecification)88 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)88 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)72 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)62 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)58 Test (org.junit.Test)58 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)54 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)54 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)48 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)40 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)39 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)36 FileSplit (org.apache.hyracks.api.io.FileSplit)33 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)33 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)29 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)24 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)24 PlainFileWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor)20 MultiFieldsAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory)20 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)19