Search in sources :

Example 1 with ITupleWriter

use of org.apache.hyracks.hdfs.api.ITupleWriter in project asterixdb by apache.

the class HDFSWriteOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, final IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
    return new AbstractUnaryInputSinkOperatorNodePushable() {

        private FSDataOutputStream dos;

        private RecordDescriptor inputRd = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0);

        ;

        private FrameTupleAccessor accessor = new FrameTupleAccessor(inputRd);

        private FrameTupleReference tuple = new FrameTupleReference();

        private ITupleWriter tupleWriter;

        private ClassLoader ctxCL;

        @Override
        public void open() throws HyracksDataException {
            ctxCL = Thread.currentThread().getContextClassLoader();
            Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
            JobConf conf = confFactory.getConf();
            String outputDirPath = FileOutputFormat.getOutputPath(conf).toString();
            String fileName = outputDirPath + File.separator + "part-" + partition;
            tupleWriter = tupleWriterFactory.getTupleWriter(ctx, partition, nPartitions);
            try {
                FileSystem dfs = FileSystem.get(conf);
                dos = dfs.create(new Path(fileName), true);
                tupleWriter.open(dos);
            } catch (Exception e) {
                throw new HyracksDataException(e);
            }
        }

        @Override
        public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
            accessor.reset(buffer);
            int tupleCount = accessor.getTupleCount();
            for (int i = 0; i < tupleCount; i++) {
                tuple.reset(accessor, i);
                tupleWriter.write(dos, tuple);
            }
        }

        @Override
        public void fail() throws HyracksDataException {
        }

        @Override
        public void close() throws HyracksDataException {
            try {
                tupleWriter.close(dos);
                dos.close();
            } catch (Exception e) {
                throw new HyracksDataException(e);
            } finally {
                Thread.currentThread().setContextClassLoader(ctxCL);
            }
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ITupleWriter(org.apache.hyracks.hdfs.api.ITupleWriter) FrameTupleReference(org.apache.hyracks.dataflow.common.data.accessors.FrameTupleReference) ByteBuffer(java.nio.ByteBuffer) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) AbstractUnaryInputSinkOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) JobConf(org.apache.hadoop.mapred.JobConf) FrameTupleAccessor(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor)

Example 2 with ITupleWriter

use of org.apache.hyracks.hdfs.api.ITupleWriter in project asterixdb by apache.

the class HDFSWriteOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, final IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
    return new AbstractUnaryInputSinkOperatorNodePushable() {

        private FSDataOutputStream dos;

        private RecordDescriptor inputRd = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0);

        ;

        private FrameTupleAccessor accessor = new FrameTupleAccessor(inputRd);

        private FrameTupleReference tuple = new FrameTupleReference();

        private ITupleWriter tupleWriter;

        private ClassLoader ctxCL;

        @Override
        public void open() throws HyracksDataException {
            ctxCL = Thread.currentThread().getContextClassLoader();
            Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
            Job conf = confFactory.getConf();
            String outputPath = FileOutputFormat.getOutputPath(conf).toString();
            String fileName = outputPath + File.separator + "part-" + partition;
            tupleWriter = tupleWriterFactory.getTupleWriter(ctx, partition, nPartitions);
            try {
                FileSystem dfs = FileSystem.get(conf.getConfiguration());
                dos = dfs.create(new Path(fileName), true);
                tupleWriter.open(dos);
            } catch (Exception e) {
                throw new HyracksDataException(e);
            }
        }

        @Override
        public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
            accessor.reset(buffer);
            int tupleCount = accessor.getTupleCount();
            for (int i = 0; i < tupleCount; i++) {
                tuple.reset(accessor, i);
                tupleWriter.write(dos, tuple);
            }
        }

        @Override
        public void fail() throws HyracksDataException {
        }

        @Override
        public void close() throws HyracksDataException {
            try {
                tupleWriter.close(dos);
                dos.close();
            } catch (Exception e) {
                throw new HyracksDataException(e);
            } finally {
                Thread.currentThread().setContextClassLoader(ctxCL);
            }
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ITupleWriter(org.apache.hyracks.hdfs.api.ITupleWriter) FrameTupleReference(org.apache.hyracks.dataflow.common.data.accessors.FrameTupleReference) ByteBuffer(java.nio.ByteBuffer) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) AbstractUnaryInputSinkOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Job(org.apache.hadoop.mapreduce.Job) FrameTupleAccessor(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor)

Example 3 with ITupleWriter

use of org.apache.hyracks.hdfs.api.ITupleWriter in project asterixdb by apache.

the class TextTupleWriterFactory method getTupleWriter.

@Override
public ITupleWriter getTupleWriter(IHyracksTaskContext ctx, int partition, int nPartition) {
    return new ITupleWriter() {

        private byte newLine = "\n".getBytes()[0];

        @Override
        public void open(DataOutput output) {
        }

        @Override
        public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
            byte[] data = tuple.getFieldData(0);
            int start = tuple.getFieldStart(0);
            int len = tuple.getFieldLength(0);
            try {
                output.write(data, start, len);
                output.writeByte(newLine);
            } catch (Exception e) {
                throw new HyracksDataException(e);
            }
        }

        @Override
        public void close(DataOutput output) {
        }
    };
}
Also used : DataOutput(java.io.DataOutput) ITupleWriter(org.apache.hyracks.hdfs.api.ITupleWriter) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Aggregations

HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)3 ITupleWriter (org.apache.hyracks.hdfs.api.ITupleWriter)3 ByteBuffer (java.nio.ByteBuffer)2 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)2 HyracksException (org.apache.hyracks.api.exceptions.HyracksException)2 FrameTupleAccessor (org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor)2 FrameTupleReference (org.apache.hyracks.dataflow.common.data.accessors.FrameTupleReference)2 AbstractUnaryInputSinkOperatorNodePushable (org.apache.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable)2 DataOutput (java.io.DataOutput)1 JobConf (org.apache.hadoop.mapred.JobConf)1 Job (org.apache.hadoop.mapreduce.Job)1 ITupleReference (org.apache.hyracks.dataflow.common.data.accessors.ITupleReference)1