Search in sources :

Example 21 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class DatasetUtil method createDummyKeyProviderOp.

/**
     * Creates a dummy key provider operator for the primary index scan.
     *
     * @param spec,
     *            the job specification.
     * @param dataset,
     *            the dataset to scan.
     * @param metadataProvider,
     *            the metadata provider.
     * @return a dummy key provider operator.
     * @throws AlgebricksException
     */
public static IOperatorDescriptor createDummyKeyProviderOp(JobSpecification spec, Dataset dataset, MetadataProvider metadataProvider) throws AlgebricksException {
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
    AlgebricksPartitionConstraint primaryPartitionConstraint = primarySplitsAndConstraint.second;
    // Build dummy tuple containing one field with a dummy value inside.
    ArrayTupleBuilder tb = new ArrayTupleBuilder(1);
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    try {
        // Serialize dummy value into a field.
        IntegerSerializerDeserializer.INSTANCE.serialize(0, dos);
    } catch (HyracksDataException e) {
        throw new AsterixException(e);
    }
    // Add dummy field.
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { IntegerSerializerDeserializer.INSTANCE };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, keyProviderOp, primaryPartitionConstraint);
    return keyProviderOp;
}
Also used : DataOutput(java.io.DataOutput) ConstantTupleSourceOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor) AsterixException(org.apache.asterix.common.exceptions.AsterixException) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)

Example 22 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class PushRuntimeTest method scanSelectWrite.

@Test
public void scanSelectWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    // the scanner
    FileSplit[] intFileSplits = new FileSplit[1];
    intFileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "simple" + File.separator + "int-part1.tbl");
    IFileSplitProvider intSplitProvider = new ConstantFileSplitProvider(intFileSplits);
    RecordDescriptor intScannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
    IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE };
    FileScanOperatorDescriptor intScanner = new FileScanOperatorDescriptor(spec, intSplitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), intScannerDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, intScanner, DEFAULT_NODES);
    // the algebricks op.
    IScalarEvaluatorFactory cond = new IntegerGreaterThanEvalFactory(new IntegerConstantEvalFactory(2), new TupleFieldEvaluatorFactory(0));
    StreamSelectRuntimeFactory select = new StreamSelectRuntimeFactory(cond, new int[] { 0 }, BinaryBooleanInspectorImpl.FACTORY, false, -1, null);
    RecordDescriptor selectDesc = intScannerDesc;
    String filePath = PATH_ACTUAL + SEPARATOR + "scanSelectWrite.out";
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, selectDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { select, writer }, new RecordDescriptor[] { selectDesc, null });
    PartitionConstraintHelper.addPartitionCountConstraint(spec, algebricksOp, 1);
    spec.connect(new OneToOneConnectorDescriptor(spec), intScanner, 0, algebricksOp, 0);
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    StringBuilder buf = new StringBuilder();
    readFileToString(outFile, buf);
    Assert.assertEquals("0", buf.toString());
    outFile.delete();
}
Also used : TupleFieldEvaluatorFactory(org.apache.hyracks.algebricks.runtime.evaluators.TupleFieldEvaluatorFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory) StreamSelectRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.StreamSelectRuntimeFactory) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) File(java.io.File) Test(org.junit.Test)

Example 23 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class PushRuntimeTest method etsAssignPrint.

@Test
public void etsAssignPrint() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    IntegerConstantEvalFactory const1 = new IntegerConstantEvalFactory(400);
    IntegerConstantEvalFactory const2 = new IntegerConstantEvalFactory(3);
    EmptyTupleSourceRuntimeFactory ets = new EmptyTupleSourceRuntimeFactory();
    RecordDescriptor etsDesc = new RecordDescriptor(new ISerializerDeserializer[] {});
    AssignRuntimeFactory assign = new AssignRuntimeFactory(new int[] { 0, 1 }, new IScalarEvaluatorFactory[] { const1, const2 }, new int[] { 0, 1 });
    RecordDescriptor assignDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    PrinterRuntimeFactory printer = new PrinterRuntimeFactory(new int[] { 0, 1 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE, IntegerPrinterFactory.INSTANCE }, assignDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 0, 0, new IPushRuntimeFactory[] { ets, assign, printer }, new RecordDescriptor[] { etsDesc, assignDesc, null });
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, DEFAULT_NODES);
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
}
Also used : PrinterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.PrinterRuntimeFactory) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) EmptyTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.EmptyTupleSourceRuntimeFactory) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) AssignRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.AssignRuntimeFactory) Test(org.junit.Test)

Example 24 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class PushRuntimeTest method etsAssignWrite.

@Test
public void etsAssignWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    IntegerConstantEvalFactory const1 = new IntegerConstantEvalFactory(400);
    IntegerConstantEvalFactory const2 = new IntegerConstantEvalFactory(3);
    EmptyTupleSourceRuntimeFactory ets = new EmptyTupleSourceRuntimeFactory();
    RecordDescriptor etsDesc = new RecordDescriptor(new ISerializerDeserializer[] {});
    AssignRuntimeFactory assign = new AssignRuntimeFactory(new int[] { 0, 1 }, new IScalarEvaluatorFactory[] { const1, const2 }, new int[] { 0, 1 });
    RecordDescriptor assignDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    String filePath = PATH_ACTUAL + SEPARATOR + "etsAssignWrite.out";
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0, 1 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE, IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, assignDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 0, 0, new IPushRuntimeFactory[] { ets, assign, writer }, new RecordDescriptor[] { etsDesc, assignDesc, null });
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, DEFAULT_NODES);
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    StringBuilder buf = new StringBuilder();
    readFileToString(outFile, buf);
    Assert.assertEquals("400; 3", buf.toString());
    outFile.delete();
}
Also used : SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) EmptyTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.EmptyTupleSourceRuntimeFactory) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) AssignRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.AssignRuntimeFactory) File(java.io.File) Test(org.junit.Test)

Example 25 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class PushRuntimeTest method scanSplitWrite.

@Test
public void scanSplitWrite() throws Exception {
    final int outputArity = 2;
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    String[] inputFileName = { "data" + File.separator + "simple" + File.separator + "int-string-part1.tbl", "data" + File.separator + "simple" + File.separator + "int-string-part1-split-0.tbl", "data" + File.separator + "simple" + File.separator + "int-string-part1-split-1.tbl" };
    File[] inputFiles = new File[inputFileName.length];
    for (int i = 0; i < inputFileName.length; i++) {
        inputFiles[i] = new File(inputFileName[i]);
    }
    File[] outputFile = new File[outputArity];
    FileSplit[] outputFileSplit = new FileSplit[outputArity];
    for (int i = 0; i < outputArity; i++) {
        outputFileSplit[i] = createFile(AlgebricksHyracksIntegrationUtil.nc1);
        outputFile[i] = outputFileSplit[i].getFile(AlgebricksHyracksIntegrationUtil.nc1.getIoManager());
    }
    FileSplit[] inputSplits = new FileSplit[] { new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, inputFileName[0]) };
    IFileSplitProvider intSplitProvider = new ConstantFileSplitProvider(inputSplits);
    RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
    IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
    FileScanOperatorDescriptor intScanner = new FileScanOperatorDescriptor(spec, intSplitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, intScanner, DEFAULT_NODES);
    SplitOperatorDescriptor splitOp = new SplitOperatorDescriptor(spec, scannerDesc, outputArity, new TupleFieldEvaluatorFactory(0), BinaryIntegerInspectorImpl.FACTORY);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, splitOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    IOperatorDescriptor[] outputOp = new IOperatorDescriptor[outputFile.length];
    for (int i = 0; i < outputArity; i++) {
        outputOp[i] = new LineFileWriteOperatorDescriptor(spec, new FileSplit[] { outputFileSplit[i] });
        PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, outputOp[i], new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    }
    spec.connect(new OneToOneConnectorDescriptor(spec), intScanner, 0, splitOp, 0);
    for (int i = 0; i < outputArity; i++) {
        spec.connect(new OneToOneConnectorDescriptor(spec), splitOp, i, outputOp[i], 0);
    }
    for (int i = 0; i < outputArity; i++) {
        spec.addRoot(outputOp[i]);
    }
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    for (int i = 0; i < outputArity; i++) {
        compareFiles("data" + File.separator + "device0" + File.separator + inputFileName[i + 1], outputFile[i].getAbsolutePath());
    }
}
Also used : TupleFieldEvaluatorFactory(org.apache.hyracks.algebricks.runtime.evaluators.TupleFieldEvaluatorFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) SplitOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.std.SplitOperatorDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) LineFileWriteOperatorDescriptor(org.apache.hyracks.dataflow.std.file.LineFileWriteOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) File(java.io.File) Test(org.junit.Test)

Aggregations

RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)169 JobSpecification (org.apache.hyracks.api.job.JobSpecification)90 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)74 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)72 Test (org.junit.Test)69 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)64 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)55 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)53 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)52 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)41 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)40 FileSplit (org.apache.hyracks.api.io.FileSplit)38 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)37 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)36 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)35 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)35 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)33 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)31 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)27 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)25