Search in sources :

Example 86 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class UnionTest method createUnionJobSpec.

public static JobSpecification createUnionJobSpec() throws Exception {
    JobSpecification spec = new JobSpecification();
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt"), new ManagedFileSplit(NC1_ID, "data" + File.separator + "nc1" + File.separator + "words.txt") });
    RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor csvScanner01 = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner01, NC2_ID, NC1_ID);
    FileScanOperatorDescriptor csvScanner02 = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner02, NC2_ID, NC1_ID);
    UnionAllOperatorDescriptor unionAll = new UnionAllOperatorDescriptor(spec, 2, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, unionAll, NC2_ID, NC1_ID);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), csvScanner01, 0, unionAll, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), csvScanner02, 0, unionAll, 1);
    spec.connect(new OneToOneConnectorDescriptor(spec), unionAll, 0, printer, 0);
    spec.addRoot(printer);
    return spec;
}
Also used : IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) UnionAllOperatorDescriptor(org.apache.hyracks.dataflow.std.union.UnionAllOperatorDescriptor)

Example 87 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class NestedPlansRunningAggregatorFactory method assemblePipeline.

private IFrameWriter assemblePipeline(AlgebricksPipeline subplan, IFrameWriter writer, IHyracksTaskContext ctx) throws HyracksDataException {
    // plug the operators
    IFrameWriter start = writer;
    IPushRuntimeFactory[] runtimeFactories = subplan.getRuntimeFactories();
    RecordDescriptor[] recordDescriptors = subplan.getRecordDescriptors();
    for (int i = runtimeFactories.length - 1; i >= 0; i--) {
        IPushRuntime newRuntime = runtimeFactories[i].createPushRuntime(ctx);
        newRuntime.setFrameWriter(0, start, recordDescriptors[i]);
        if (i > 0) {
            newRuntime.setInputRecordDescriptor(0, recordDescriptors[i - 1]);
        } else {
            // the nts has the same input and output rec. desc.
            newRuntime.setInputRecordDescriptor(0, recordDescriptors[0]);
        }
        start = newRuntime;
    }
    return start;
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IPushRuntime(org.apache.hyracks.algebricks.runtime.base.IPushRuntime) IPushRuntimeFactory(org.apache.hyracks.algebricks.runtime.base.IPushRuntimeFactory)

Example 88 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class ResultWriterOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
    final IDatasetPartitionManager dpm = ctx.getDatasetPartitionManager();
    final IFrame frame = new VSizeFrame(ctx);
    final FrameOutputStream frameOutputStream = new FrameOutputStream(ctx.getInitialFrameSize());
    frameOutputStream.reset(frame, true);
    PrintStream printStream = new PrintStream(frameOutputStream);
    final RecordDescriptor outRecordDesc = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0);
    final IResultSerializer resultSerializer = resultSerializerFactory.createResultSerializer(outRecordDesc, printStream);
    final FrameTupleAccessor frameTupleAccessor = new FrameTupleAccessor(outRecordDesc);
    return new AbstractUnaryInputSinkOperatorNodePushable() {

        private IFrameWriter datasetPartitionWriter;

        private boolean failed = false;

        @Override
        public void open() throws HyracksDataException {
            try {
                datasetPartitionWriter = dpm.createDatasetPartitionWriter(ctx, rsId, ordered, asyncMode, partition, nPartitions);
                datasetPartitionWriter.open();
                resultSerializer.init();
            } catch (HyracksException e) {
                throw HyracksDataException.create(e);
            }
        }

        @Override
        public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
            frameTupleAccessor.reset(buffer);
            for (int tIndex = 0; tIndex < frameTupleAccessor.getTupleCount(); tIndex++) {
                resultSerializer.appendTuple(frameTupleAccessor, tIndex);
                if (!frameOutputStream.appendTuple()) {
                    frameOutputStream.flush(datasetPartitionWriter);
                    resultSerializer.appendTuple(frameTupleAccessor, tIndex);
                    frameOutputStream.appendTuple();
                }
            }
        }

        @Override
        public void fail() throws HyracksDataException {
            failed = true;
            datasetPartitionWriter.fail();
        }

        @Override
        public void close() throws HyracksDataException {
            try {
                if (!failed && frameOutputStream.getTupleCount() > 0) {
                    frameOutputStream.flush(datasetPartitionWriter);
                }
            } catch (Exception e) {
                datasetPartitionWriter.fail();
                throw e;
            } finally {
                datasetPartitionWriter.close();
            }
        }

        @Override
        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append("{ ");
            sb.append("\"rsId\": \"").append(rsId).append("\", ");
            sb.append("\"ordered\": ").append(ordered).append(", ");
            sb.append("\"asyncMode\": ").append(asyncMode).append(" }");
            return sb.toString();
        }
    };
}
Also used : PrintStream(java.io.PrintStream) IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) IFrame(org.apache.hyracks.api.comm.IFrame) IResultSerializer(org.apache.hyracks.api.dataflow.value.IResultSerializer) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) ByteBuffer(java.nio.ByteBuffer) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IOException(java.io.IOException) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) IDatasetPartitionManager(org.apache.hyracks.api.dataset.IDatasetPartitionManager) AbstractUnaryInputSinkOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable) FrameOutputStream(org.apache.hyracks.dataflow.common.comm.io.FrameOutputStream) FrameTupleAccessor(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor)

Example 89 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class FileRemoveOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
    final FileSplit split = fileSplitProvider.getFileSplits()[partition];
    final IIOManager ioManager = ctx.getIoManager();
    return new AbstractOperatorNodePushable() {

        @Override
        public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc) {
            throw new IllegalStateException();
        }

        @Override
        public void initialize() throws HyracksDataException {
            // will only work for files inside the io devices
            File f = split.getFile(ioManager);
            if (quietly) {
                FileUtils.deleteQuietly(f);
            } else {
                try {
                    FileUtils.deleteDirectory(f);
                } catch (IOException e) {
                    throw new HyracksDataException(e);
                }
            }
        }

        @Override
        public IFrameWriter getInputFrameWriter(int index) {
            throw new IllegalStateException();
        }

        @Override
        public int getInputArity() {
            return 0;
        }

        @Override
        public void deinitialize() throws HyracksDataException {
        }
    };
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) AbstractOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractOperatorNodePushable) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IOException(java.io.IOException) FileSplit(org.apache.hyracks.api.io.FileSplit) IIOManager(org.apache.hyracks.api.io.IIOManager) File(java.io.File) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 90 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class FlushDatasetUtil method flushDataset.

public static void flushDataset(IHyracksClientConnection hcc, MetadataProvider metadataProvider, String dataverseName, String datasetName, String indexName) throws Exception {
    CompilerProperties compilerProperties = metadataProvider.getApplicationContext().getCompilerProperties();
    int frameSize = compilerProperties.getFrameSize();
    JobSpecification spec = new JobSpecification(frameSize);
    RecordDescriptor[] rDescs = new RecordDescriptor[] { new RecordDescriptor(new ISerializerDeserializer[] {}) };
    AlgebricksMetaOperatorDescriptor emptySource = new AlgebricksMetaOperatorDescriptor(spec, 0, 1, new IPushRuntimeFactory[] { new EmptyTupleSourceRuntimeFactory() }, rDescs);
    org.apache.asterix.common.transactions.JobId jobId = JobIdFactory.generateJobId();
    Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName);
    FlushDatasetOperatorDescriptor flushOperator = new FlushDatasetOperatorDescriptor(spec, jobId, dataset.getDatasetId());
    spec.connect(new OneToOneConnectorDescriptor(spec), emptySource, 0, flushOperator, 0);
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
    AlgebricksPartitionConstraint primaryPartitionConstraint = primarySplitsAndConstraint.second;
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, emptySource, primaryPartitionConstraint);
    JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(jobId, true);
    spec.setJobletEventListenerFactory(jobEventListenerFactory);
    JobUtils.runJob(hcc, spec, true);
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) Dataset(org.apache.asterix.metadata.entities.Dataset) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) CompilerProperties(org.apache.asterix.common.config.CompilerProperties) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) JobEventListenerFactory(org.apache.asterix.runtime.job.listener.JobEventListenerFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) FlushDatasetOperatorDescriptor(org.apache.asterix.runtime.operators.std.FlushDatasetOperatorDescriptor) EmptyTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.EmptyTupleSourceRuntimeFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification)

Aggregations

RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)169 JobSpecification (org.apache.hyracks.api.job.JobSpecification)90 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)74 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)72 Test (org.junit.Test)69 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)64 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)55 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)53 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)52 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)41 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)40 FileSplit (org.apache.hyracks.api.io.FileSplit)38 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)37 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)36 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)35 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)35 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)33 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)31 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)27 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)25