Search in sources :

Example 1 with SplitOperatorDescriptor

use of org.apache.hyracks.algebricks.runtime.operators.std.SplitOperatorDescriptor in project asterixdb by apache.

the class PushRuntimeTest method scanSplitWrite.

@Test
public void scanSplitWrite() throws Exception {
    final int outputArity = 2;
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    String[] inputFileName = { "data" + File.separator + "simple" + File.separator + "int-string-part1.tbl", "data" + File.separator + "simple" + File.separator + "int-string-part1-split-0.tbl", "data" + File.separator + "simple" + File.separator + "int-string-part1-split-1.tbl" };
    File[] inputFiles = new File[inputFileName.length];
    for (int i = 0; i < inputFileName.length; i++) {
        inputFiles[i] = new File(inputFileName[i]);
    }
    File[] outputFile = new File[outputArity];
    FileSplit[] outputFileSplit = new FileSplit[outputArity];
    for (int i = 0; i < outputArity; i++) {
        outputFileSplit[i] = createFile(AlgebricksHyracksIntegrationUtil.nc1);
        outputFile[i] = outputFileSplit[i].getFile(AlgebricksHyracksIntegrationUtil.nc1.getIoManager());
    }
    FileSplit[] inputSplits = new FileSplit[] { new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, inputFileName[0]) };
    IFileSplitProvider intSplitProvider = new ConstantFileSplitProvider(inputSplits);
    RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
    IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
    FileScanOperatorDescriptor intScanner = new FileScanOperatorDescriptor(spec, intSplitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, intScanner, DEFAULT_NODES);
    SplitOperatorDescriptor splitOp = new SplitOperatorDescriptor(spec, scannerDesc, outputArity, new TupleFieldEvaluatorFactory(0), BinaryIntegerInspectorImpl.FACTORY);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, splitOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    IOperatorDescriptor[] outputOp = new IOperatorDescriptor[outputFile.length];
    for (int i = 0; i < outputArity; i++) {
        outputOp[i] = new LineFileWriteOperatorDescriptor(spec, new FileSplit[] { outputFileSplit[i] });
        PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, outputOp[i], new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    }
    spec.connect(new OneToOneConnectorDescriptor(spec), intScanner, 0, splitOp, 0);
    for (int i = 0; i < outputArity; i++) {
        spec.connect(new OneToOneConnectorDescriptor(spec), splitOp, i, outputOp[i], 0);
    }
    for (int i = 0; i < outputArity; i++) {
        spec.addRoot(outputOp[i]);
    }
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    for (int i = 0; i < outputArity; i++) {
        compareFiles("data" + File.separator + "device0" + File.separator + inputFileName[i + 1], outputFile[i].getAbsolutePath());
    }
}
Also used : TupleFieldEvaluatorFactory(org.apache.hyracks.algebricks.runtime.evaluators.TupleFieldEvaluatorFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) SplitOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.std.SplitOperatorDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) LineFileWriteOperatorDescriptor(org.apache.hyracks.dataflow.std.file.LineFileWriteOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) File(java.io.File) Test(org.junit.Test)

Example 2 with SplitOperatorDescriptor

use of org.apache.hyracks.algebricks.runtime.operators.std.SplitOperatorDescriptor in project asterixdb by apache.

the class SplitPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    SplitOperator sop = (SplitOperator) op;
    int outputArity = sop.getOutputArity();
    int defaultBranch = sop.getDefaultBranch();
    boolean propageToAllBranchAsDefault = sop.getPropageToAllBranchAsDefault();
    IOperatorDescriptorRegistry spec = builder.getJobSpec();
    RecordDescriptor recDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), propagatedSchema, context);
    IExpressionRuntimeProvider expressionRuntimeProvider = context.getExpressionRuntimeProvider();
    IScalarEvaluatorFactory brachingExprEvalFactory = expressionRuntimeProvider.createEvaluatorFactory(sop.getBranchingExpression().getValue(), context.getTypeEnvironment(op), inputSchemas, context);
    IBinaryIntegerInspectorFactory intInsepctorFactory = context.getBinaryIntegerInspectorFactory();
    SplitOperatorDescriptor splitOpDesc = new SplitOperatorDescriptor(spec, recDescriptor, outputArity, brachingExprEvalFactory, intInsepctorFactory, defaultBranch, propageToAllBranchAsDefault);
    contributeOpDesc(builder, (AbstractLogicalOperator) op, splitOpDesc);
    ILogicalOperator src = op.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, op, 0);
}
Also used : IExpressionRuntimeProvider(org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionRuntimeProvider) SplitOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.std.SplitOperatorDescriptor) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) SplitOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.SplitOperator) IOperatorDescriptorRegistry(org.apache.hyracks.api.job.IOperatorDescriptorRegistry) IBinaryIntegerInspectorFactory(org.apache.hyracks.algebricks.data.IBinaryIntegerInspectorFactory) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory)

Aggregations

SplitOperatorDescriptor (org.apache.hyracks.algebricks.runtime.operators.std.SplitOperatorDescriptor)2 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)2 File (java.io.File)1 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)1 IExpressionRuntimeProvider (org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionRuntimeProvider)1 SplitOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.SplitOperator)1 IBinaryIntegerInspectorFactory (org.apache.hyracks.algebricks.data.IBinaryIntegerInspectorFactory)1 IScalarEvaluatorFactory (org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory)1 TupleFieldEvaluatorFactory (org.apache.hyracks.algebricks.runtime.evaluators.TupleFieldEvaluatorFactory)1 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)1 FileSplit (org.apache.hyracks.api.io.FileSplit)1 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)1 IOperatorDescriptorRegistry (org.apache.hyracks.api.job.IOperatorDescriptorRegistry)1 JobSpecification (org.apache.hyracks.api.job.JobSpecification)1 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)1 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)1 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)1 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)1 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)1 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)1