Search in sources :

Example 21 with AlgebricksMetaOperatorDescriptor

use of org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor in project asterixdb by apache.

the class SecondaryIndexOperationsHelper method createExternalAssignOp.

protected AlgebricksMetaOperatorDescriptor createExternalAssignOp(JobSpecification spec, int numSecondaryKeys, RecordDescriptor secondaryRecDesc) throws AlgebricksException {
    int[] outColumns = new int[numSecondaryKeys];
    int[] projectionList = new int[numSecondaryKeys + numPrimaryKeys];
    for (int i = 0; i < numSecondaryKeys; i++) {
        outColumns[i] = i + numPrimaryKeys + 1;
        projectionList[i] = i + numPrimaryKeys + 1;
    }
    IScalarEvaluatorFactory[] sefs = new IScalarEvaluatorFactory[secondaryFieldAccessEvalFactories.length];
    for (int i = 0; i < secondaryFieldAccessEvalFactories.length; ++i) {
        sefs[i] = secondaryFieldAccessEvalFactories[i];
    }
    //add External RIDs to the projection list
    for (int i = 0; i < numPrimaryKeys; i++) {
        projectionList[numSecondaryKeys + i] = i + 1;
    }
    AssignRuntimeFactory assign = new AssignRuntimeFactory(outColumns, sefs, projectionList);
    return new AlgebricksMetaOperatorDescriptor(spec, 1, 1, new IPushRuntimeFactory[] { assign }, new RecordDescriptor[] { secondaryRecDesc });
}
Also used : AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) AssignRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.AssignRuntimeFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory)

Example 22 with AlgebricksMetaOperatorDescriptor

use of org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor in project asterixdb by apache.

the class SecondaryInvertedIndexOperationsHelper method buildLoadingJobSpec.

@Override
public JobSpecification buildLoadingJobSpec() throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    JobId jobId = IndexUtil.bindJobEventListener(spec, metadataProvider);
    // Create dummy key provider for feeding the primary index scan.
    IOperatorDescriptor keyProviderOp = DatasetUtil.createDummyKeyProviderOp(spec, dataset, metadataProvider);
    // Create primary index scan op.
    IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, dataset, jobId);
    IOperatorDescriptor sourceOp = primaryScanOp;
    boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
    int numSecondaryKeys = index.getKeyFieldNames().size();
    if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
        sourceOp = createCastOp(spec, dataset.getDatasetType());
        spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
    }
    AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, numSecondaryKeys, secondaryRecDesc);
    // If any of the secondary fields are nullable, then add a select op
    // that filters nulls.
    AlgebricksMetaOperatorDescriptor selectOp = null;
    if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
        selectOp = createFilterNullsSelectOp(spec, numSecondaryKeys, secondaryRecDesc);
    }
    // Create a tokenizer op.
    AbstractOperatorDescriptor tokenizerOp = createTokenizerOp(spec);
    // Sort by token + primary keys.
    ExternalSortOperatorDescriptor sortOp = createSortOp(spec, tokenKeyPairComparatorFactories, tokenKeyPairRecDesc);
    // Create secondary inverted index bulk load op.
    AbstractSingleActivityOperatorDescriptor invIndexBulkLoadOp = createInvertedIndexBulkLoadOp(spec);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, invIndexBulkLoadOp, secondaryPartitionConstraint);
    AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] {});
    // Connect the operators.
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
    if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
        spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, tokenizerOp, 0);
    } else {
        spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, tokenizerOp, 0);
    }
    spec.connect(new OneToOneConnectorDescriptor(spec), tokenizerOp, 0, sortOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, invIndexBulkLoadOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), invIndexBulkLoadOp, 0, metaOp, 0);
    spec.addRoot(metaOp);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) AbstractOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractOperatorDescriptor) JobId(org.apache.asterix.common.transactions.JobId) SinkRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.base.SinkRuntimeFactory)

Example 23 with AlgebricksMetaOperatorDescriptor

use of org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor in project asterixdb by apache.

the class SecondaryIndexOperationsHelper method createFilterNullsSelectOp.

public AlgebricksMetaOperatorDescriptor createFilterNullsSelectOp(JobSpecification spec, int numSecondaryKeyFields, RecordDescriptor secondaryRecDesc) throws AlgebricksException {
    IScalarEvaluatorFactory[] andArgsEvalFactories = new IScalarEvaluatorFactory[numSecondaryKeyFields];
    NotDescriptor notDesc = new NotDescriptor();
    IsUnknownDescriptor isUnknownDesc = new IsUnknownDescriptor();
    for (int i = 0; i < numSecondaryKeyFields; i++) {
        // Access column i, and apply 'is not null'.
        ColumnAccessEvalFactory columnAccessEvalFactory = new ColumnAccessEvalFactory(i);
        IScalarEvaluatorFactory isUnknownEvalFactory = isUnknownDesc.createEvaluatorFactory(new IScalarEvaluatorFactory[] { columnAccessEvalFactory });
        IScalarEvaluatorFactory notEvalFactory = notDesc.createEvaluatorFactory(new IScalarEvaluatorFactory[] { isUnknownEvalFactory });
        andArgsEvalFactories[i] = notEvalFactory;
    }
    IScalarEvaluatorFactory selectCond;
    if (numSecondaryKeyFields > 1) {
        // Create conjunctive condition where all secondary index keys must
        // satisfy 'is not null'.
        AndDescriptor andDesc = new AndDescriptor();
        selectCond = andDesc.createEvaluatorFactory(andArgsEvalFactories);
    } else {
        selectCond = andArgsEvalFactories[0];
    }
    StreamSelectRuntimeFactory select = new StreamSelectRuntimeFactory(selectCond, null, BinaryBooleanInspector.FACTORY, false, -1, null);
    AlgebricksMetaOperatorDescriptor asterixSelectOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 1, new IPushRuntimeFactory[] { select }, new RecordDescriptor[] { secondaryRecDesc });
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, asterixSelectOp, primaryPartitionConstraint);
    return asterixSelectOp;
}
Also used : AndDescriptor(org.apache.asterix.runtime.evaluators.functions.AndDescriptor) StreamSelectRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.StreamSelectRuntimeFactory) NotDescriptor(org.apache.asterix.runtime.evaluators.functions.NotDescriptor) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) ColumnAccessEvalFactory(org.apache.hyracks.algebricks.runtime.evaluators.ColumnAccessEvalFactory) IsUnknownDescriptor(org.apache.asterix.runtime.evaluators.functions.IsUnknownDescriptor) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory)

Example 24 with AlgebricksMetaOperatorDescriptor

use of org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor in project asterixdb by apache.

the class PushRuntimeTest method etsUnnestRunningaggregateWrite.

@Test
public void etsUnnestRunningaggregateWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    EmptyTupleSourceRuntimeFactory ets = new EmptyTupleSourceRuntimeFactory();
    RecordDescriptor etsDesc = new RecordDescriptor(new ISerializerDeserializer[] {});
    IUnnestingEvaluatorFactory aggregFactory = new IntArrayUnnester(new int[] { 100, 200, 300 });
    UnnestRuntimeFactory unnest = new UnnestRuntimeFactory(0, aggregFactory, new int[] { 0 }, false, null);
    RecordDescriptor unnestDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    RunningAggregateRuntimeFactory ragg = new RunningAggregateRuntimeFactory(new int[] { 1 }, new IRunningAggregateEvaluatorFactory[] { new TupleCountRunningAggregateFunctionFactory() }, new int[] { 0, 1 });
    RecordDescriptor raggDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    String filePath = PATH_ACTUAL + SEPARATOR + "etsUnnestRunningaggregateWrite.out";
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 1 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, raggDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 0, 0, new IPushRuntimeFactory[] { ets, unnest, ragg, writer }, new RecordDescriptor[] { etsDesc, unnestDesc, raggDesc, null });
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    StringBuilder buf = new StringBuilder();
    readFileToString(outFile, buf);
    Assert.assertEquals("123", buf.toString());
    outFile.delete();
}
Also used : TupleCountRunningAggregateFunctionFactory(org.apache.hyracks.algebricks.runtime.aggregators.TupleCountRunningAggregateFunctionFactory) RunningAggregateRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.RunningAggregateRuntimeFactory) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) IUnnestingEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IUnnestingEvaluatorFactory) SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) EmptyTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.EmptyTupleSourceRuntimeFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) UnnestRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.UnnestRuntimeFactory) File(java.io.File) Test(org.junit.Test)

Example 25 with AlgebricksMetaOperatorDescriptor

use of org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor in project asterixdb by apache.

the class PushRuntimeTest method scanMicroSortWrite.

@Test
public void scanMicroSortWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    // the scanner
    FileSplit[] fileSplits = new FileSplit[1];
    fileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "nation.tbl");
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
    RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
    IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
    FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanner, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    // the algebricks op.
    InMemorySortRuntimeFactory sort = new InMemorySortRuntimeFactory(new int[] { 1 }, null, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, null);
    RecordDescriptor sortDesc = scannerDesc;
    String fileName = "scanMicroSortWrite.out";
    String filePath = PATH_ACTUAL + SEPARATOR + fileName;
    String resultFilePath = PATH_EXPECTED + SEPARATOR + fileName;
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0, 1, 2, 3 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE, UTF8StringPrinterFactory.INSTANCE, IntegerPrinterFactory.INSTANCE, UTF8StringPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, sortDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { sort, writer }, new RecordDescriptor[] { sortDesc, null });
    PartitionConstraintHelper.addPartitionCountConstraint(spec, algebricksOp, 1);
    spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, algebricksOp, 0);
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    compareFiles(filePath, resultFilePath);
    outFile.delete();
}
Also used : InMemorySortRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.sort.InMemorySortRuntimeFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) File(java.io.File) Test(org.junit.Test)

Aggregations

AlgebricksMetaOperatorDescriptor (org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor)27 JobSpecification (org.apache.hyracks.api.job.JobSpecification)19 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)16 Test (org.junit.Test)13 File (java.io.File)12 SinkWriterRuntimeFactory (org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory)12 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)11 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)9 AssignRuntimeFactory (org.apache.hyracks.algebricks.runtime.operators.std.AssignRuntimeFactory)8 EmptyTupleSourceRuntimeFactory (org.apache.hyracks.algebricks.runtime.operators.std.EmptyTupleSourceRuntimeFactory)8 IScalarEvaluatorFactory (org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory)7 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)7 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)7 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)7 FileSplit (org.apache.hyracks.api.io.FileSplit)6 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)6 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)6 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)6 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)5 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)5