Examples with RecordWriter - org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter

Example 21 with RecordWriter

use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project hive by apache.

the class HiveBinaryOutputFormat method getHiveRecordWriter.

/**
 * create the final out file, and output row by row. After one row is
 * appended, a configured row separator is appended
 *
 * @param jc
 *          the job configuration file
 * @param outPath
 *          the final output file to be created
 * @param valueClass
 *          the value class used for create
 * @param isCompressed
 *          ignored. Currently we don't support compression.
 * @param tableProperties
 *          the tableProperties of this file's corresponding table
 * @param progress
 *          progress used for status report
 * @return the RecordWriter
 */
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    FileSystem fs = outPath.getFileSystem(jc);
    final OutputStream outStream = fs.create(outPath, progress);
    return new RecordWriter() {

        @Override
        public void write(Writable r) throws IOException {
            if (r instanceof Text) {
                Text tr = (Text) r;
                outStream.write(tr.getBytes(), 0, tr.getLength());
            } else {
                // Binary SerDes always write out BytesWritable
                BytesWritable bw = (BytesWritable) r;
                outStream.write(bw.get(), 0, bw.getSize());
            }
        }

        @Override
        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}

Also used : RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStream(java.io.OutputStream) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 22 with RecordWriter

use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project hive by apache.

the class HiveSequenceFileOutputFormat method getHiveRecordWriter.

/**
 * create the final out file, and output an empty key as the key.
 *
 * @param jc
 *          the job configuration file
 * @param finalOutPath
 *          the final output file to be created
 * @param valueClass
 *          the value class used for create
 * @param isCompressed
 *          whether the content is compressed or not
 * @param tableProperties
 *          the tableInfo of this file's corresponding table
 * @param progress
 *          progress used for status report
 * @return the RecordWriter for the output file
 */
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    FileSystem fs = finalOutPath.getFileSystem(jc);
    final SequenceFile.Writer outStream = Utilities.createSequenceWriter(jc, fs, finalOutPath, BytesWritable.class, valueClass, isCompressed, progress);
    return new RecordWriter() {

        @Override
        public void write(Writable r) throws IOException {
            outStream.append(EMPTY_KEY, r);
        }

        @Override
        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}

Also used : RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) BytesWritable(org.apache.hadoop.io.BytesWritable) Writable(org.apache.hadoop.io.Writable)

Example 23 with RecordWriter

use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project hive by apache.

the class TeradataBinaryFileOutputFormat method getHiveRecordWriter.

/**
 * create the final out file, and output row by row. After one row is
 * appended, a configured row separator is appended
 *
 * @param jc
 *          the job configuration file
 * @param outPath
 *          the final output file to be created
 * @param valueClass
 *          the value class used for create
 * @param isCompressed
 *          whether the content is compressed or not
 * @param tableProperties
 *          the tableProperties of this file's corresponding table
 * @param progress
 *          progress used for status report
 * @return the RecordWriter
 */
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    FileSystem fs = outPath.getFileSystem(jc);
    final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath, progress), isCompressed);
    return new RecordWriter() {

        @Override
        public void write(Writable r) throws IOException {
            BytesWritable bw = (BytesWritable) r;
            int recordLength = bw.getLength();
            // Based on the row length to decide if the length is int or short
            String rowLength = tableProperties.getProperty(TeradataBinaryRecordReader.TD_ROW_LENGTH, TeradataBinaryRecordReader.DEFAULT_TD_ROW_LENGTH).toLowerCase();
            LOG.debug(format("The table property %s is: %s", TeradataBinaryRecordReader.TD_ROW_LENGTH, rowLength));
            if (TeradataBinaryRecordReader.TD_ROW_LENGTH_TO_BYTE_NUM.containsKey(rowLength)) {
                if (rowLength.equals(TeradataBinaryRecordReader.DEFAULT_TD_ROW_LENGTH)) {
                    // write the length using little endian
                    EndianUtils.writeSwappedShort(outStream, (short) recordLength);
                } else if (rowLength.equals(TeradataBinaryRecordReader.TD_ROW_LENGTH_1MB)) {
                    // write the length using little endian
                    EndianUtils.writeSwappedInteger(outStream, recordLength);
                }
            } else {
                throw new IllegalArgumentException(format("%s doesn't support the value %s, the supported values are %s", TeradataBinaryRecordReader.TD_ROW_LENGTH, rowLength, TeradataBinaryRecordReader.TD_ROW_LENGTH_TO_BYTE_NUM.keySet()));
            }
            // write the content (the content is in little endian)
            outStream.write(bw.getBytes(), 0, bw.getLength());
            // write the record ending
            outStream.write(RECORD_END_BYTE);
        }

        @Override
        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}

Example 24 with RecordWriter

use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project hive by apache.

the class Rot13OutputFormat method getHiveRecordWriter.

@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    final RecordWriter result = super.getHiveRecordWriter(jc, outPath, valueClass, isCompressed, tableProperties, progress);
    final Reporter reporter = (Reporter) progress;
    reporter.setStatus("got here");
    System.out.println("Got a reporter " + reporter);
    return new RecordWriter() {

        @Override
        public void write(Writable w) throws IOException {
            if (w instanceof Text) {
                Text value = (Text) w;
                Rot13InputFormat.rot13(value.getBytes(), 0, value.getLength());
                result.write(w);
            } else if (w instanceof BytesWritable) {
                BytesWritable value = (BytesWritable) w;
                Rot13InputFormat.rot13(value.getBytes(), 0, value.getLength());
                result.write(w);
            } else {
                throw new IllegalArgumentException("need text or bytes writable " + " instead of " + w.getClass().getName());
            }
        }

        @Override
        public void close(boolean abort) throws IOException {
            result.close(abort);
        }
    };
}

Also used : RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) Reporter(org.apache.hadoop.mapred.Reporter) BytesWritable(org.apache.hadoop.io.BytesWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable)

Aggregations

RecordWriter (org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter)24 Writable (org.apache.hadoop.io.Writable)16 Path (org.apache.hadoop.fs.Path)12 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)10 BytesWritable (org.apache.hadoop.io.BytesWritable)8 JobConf (org.apache.hadoop.mapred.JobConf)8 FileSystem (org.apache.hadoop.fs.FileSystem)7 SettableStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector)7 Text (org.apache.hadoop.io.Text)6 Properties (java.util.Properties)5 Serializer (org.apache.hadoop.hive.serde2.Serializer)5 SequenceFile (org.apache.hadoop.io.SequenceFile)4 Slice (io.airlift.slice.Slice)3 OutputStream (java.io.OutputStream)3 LongWritable (org.apache.hadoop.io.LongWritable)3 ExtendedRecordWriter (com.facebook.presto.hive.RecordFileWriter.ExtendedRecordWriter)2 DataSize (io.airlift.units.DataSize)2 File (java.io.File)2 IOException (java.io.IOException)2 Field (java.lang.reflect.Field)2