Search in sources :

Example 11 with RecordWriter

use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project hive by apache.

the class Utilities method createEmptyFile.

@SuppressWarnings({ "rawtypes", "unchecked" })
private static Path createEmptyFile(Path hiveScratchDir, HiveOutputFormat outFileFormat, JobConf job, Properties props, boolean dummyRow) throws IOException, InstantiationException, IllegalAccessException {
    // create a dummy empty file in a new directory
    String newDir = hiveScratchDir + Path.SEPARATOR + UUID.randomUUID().toString();
    Path newPath = new Path(newDir);
    FileSystem fs = newPath.getFileSystem(job);
    fs.mkdirs(newPath);
    // Qualify the path against the file system. The user configured path might contain default port which is skipped
    // in the file status. This makes sure that all paths which goes into PathToPartitionInfo are always listed status
    // file path.
    newPath = fs.makeQualified(newPath);
    String newFile = newDir + Path.SEPARATOR + "emptyFile";
    Path newFilePath = new Path(newFile);
    RecordWriter recWriter = outFileFormat.getHiveRecordWriter(job, newFilePath, Text.class, false, props, null);
    if (dummyRow) {
        // empty files are omitted at CombineHiveInputFormat.
        // for meta-data only query, it effectively makes partition columns disappear..
        // this could be fixed by other methods, but this seemed to be the most easy (HIVEV-2955)
        // written via HiveIgnoreKeyTextOutputFormat
        recWriter.write(new Text("empty"));
    }
    recWriter.close(false);
    return StringInternUtils.internUriStringsInPath(newPath);
}
Also used : Path(org.apache.hadoop.fs.Path) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) FileSystem(org.apache.hadoop.fs.FileSystem) Text(org.apache.hadoop.io.Text)

Example 12 with RecordWriter

use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project hive by apache.

the class ColumnarStorageBench method prepareBenchmark.

/**
 * Initializes resources that will be needed for each of the benchmark tests.
 *
 * @throws SerDeException If it cannot initialize the desired test format.
 * @throws IOException If it cannot write data to temporary files.
 */
@Setup(Level.Trial)
public void prepareBenchmark() throws SerDeException, IOException {
    if (format.equalsIgnoreCase("parquet") || format.equalsIgnoreCase("parquet-vec")) {
        storageFormatTest = new ParquetStorageFormatTest();
    } else if (format.equalsIgnoreCase("orc")) {
        storageFormatTest = new OrcStorageFormatTest();
    } else {
        throw new IllegalArgumentException("Invalid file format argument: " + format);
    }
    for (int i = 0; i < rows.length; i++) {
        recordWritable[i] = storageFormatTest.serialize(rows[i], oi);
    }
    fs = FileSystem.getLocal(new Configuration());
    writeFile = createTempFile();
    writePath = new Path(writeFile.getPath());
    readFile = createTempFile();
    readPath = new Path(readFile.getPath());
    /*
     * Write a bunch of random rows that will be used for read benchmark.
     */
    RecordWriter writer = storageFormatTest.getRecordWriter(readPath);
    storageFormatTest.writeRecords(writer, recordWritable);
    writer.close(false);
}
Also used : Path(org.apache.hadoop.fs.Path) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) Configuration(org.apache.hadoop.conf.Configuration) Setup(org.openjdk.jmh.annotations.Setup)

Example 13 with RecordWriter

use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project hive by apache.

the class HiveNullValueSequenceFileOutputFormat method getHiveRecordWriter.

@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    FileSystem fs = finalOutPath.getFileSystem(jc);
    final SequenceFile.Writer outStream = Utilities.createSequenceWriter(jc, fs, finalOutPath, HiveKey.class, NullWritable.class, isCompressed, progress);
    keyWritable = new HiveKey();
    keyIsText = valueClass.equals(Text.class);
    return new RecordWriter() {

        @Override
        public void write(Writable r) throws IOException {
            if (keyIsText) {
                Text text = (Text) r;
                keyWritable.set(text.getBytes(), 0, text.getLength());
            } else {
                BytesWritable bw = (BytesWritable) r;
                // Once we drop support for old Hadoop versions, change these
                // to getBytes() and getLength() to fix the deprecation warnings.
                // Not worth a shim.
                keyWritable.set(bw.get(), 0, bw.getSize());
            }
            keyWritable.setHashCode(r.hashCode());
            outStream.append(keyWritable, NULL_WRITABLE);
        }

        @Override
        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}
Also used : RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 14 with RecordWriter

use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project hive by apache.

the class HiveIgnoreKeyTextOutputFormat method getHiveRecordWriter.

/**
 * create the final out file, and output row by row. After one row is
 * appended, a configured row separator is appended
 *
 * @param jc
 *          the job configuration file
 * @param outPath
 *          the final output file to be created
 * @param valueClass
 *          the value class used for create
 * @param isCompressed
 *          whether the content is compressed or not
 * @param tableProperties
 *          the tableProperties of this file's corresponding table
 * @param progress
 *          progress used for status report
 * @return the RecordWriter
 */
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    int rowSeparator = 0;
    String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n");
    try {
        rowSeparator = Byte.parseByte(rowSeparatorString);
    } catch (NumberFormatException e) {
        rowSeparator = rowSeparatorString.charAt(0);
    }
    final int finalRowSeparator = rowSeparator;
    FileSystem fs = outPath.getFileSystem(jc);
    final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath, progress), isCompressed);
    return new RecordWriter() {

        @Override
        public void write(Writable r) throws IOException {
            if (r instanceof Text) {
                Text tr = (Text) r;
                outStream.write(tr.getBytes(), 0, tr.getLength());
                outStream.write(finalRowSeparator);
            } else {
                // Binary SerDes always write out BytesWritable
                BytesWritable bw = (BytesWritable) r;
                outStream.write(bw.get(), 0, bw.getSize());
                outStream.write(finalRowSeparator);
            }
        }

        @Override
        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}
Also used : RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStream(java.io.OutputStream) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 15 with RecordWriter

use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project presto by prestodb.

the class TestOrcPageSourceMemoryTracking method createTestFile.

public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns, int numRows) throws Exception {
    // filter out partition keys, which are not written to the file
    testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
    tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
    serDe.initialize(CONFIGURATION, tableProperties);
    JobConf jobConf = new JobConf();
    if (compressionCodec != null) {
        CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
        jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
        jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
    }
    RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);
    try {
        SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));
        Object row = objectInspector.create();
        List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
        for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
            for (int i = 0; i < testColumns.size(); i++) {
                Object writeValue = testColumns.get(i).getWriteValue();
                if (writeValue instanceof Slice) {
                    writeValue = ((Slice) writeValue).getBytes();
                }
                objectInspector.setStructFieldData(row, fields.get(i), writeValue);
            }
            Writable record = serDe.serialize(row, objectInspector);
            recordWriter.write(record);
            if (rowNumber % STRIPE_ROWS == STRIPE_ROWS - 1) {
                flushStripe(recordWriter);
            }
        }
    } finally {
        recordWriter.close(false);
    }
    Path path = new Path(filePath);
    path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
}
Also used : Path(org.apache.hadoop.fs.Path) Writable(org.apache.hadoop.io.Writable) Properties(java.util.Properties) FileSplit(org.apache.hadoop.mapred.FileSplit) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) Slice(io.airlift.slice.Slice) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Aggregations

RecordWriter (org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter)24 Writable (org.apache.hadoop.io.Writable)16 Path (org.apache.hadoop.fs.Path)12 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)10 BytesWritable (org.apache.hadoop.io.BytesWritable)8 JobConf (org.apache.hadoop.mapred.JobConf)8 FileSystem (org.apache.hadoop.fs.FileSystem)7 SettableStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector)7 Text (org.apache.hadoop.io.Text)6 Properties (java.util.Properties)5 Serializer (org.apache.hadoop.hive.serde2.Serializer)5 SequenceFile (org.apache.hadoop.io.SequenceFile)4 Slice (io.airlift.slice.Slice)3 OutputStream (java.io.OutputStream)3 LongWritable (org.apache.hadoop.io.LongWritable)3 ExtendedRecordWriter (com.facebook.presto.hive.RecordFileWriter.ExtendedRecordWriter)2 DataSize (io.airlift.units.DataSize)2 File (java.io.File)2 IOException (java.io.IOException)2 Field (java.lang.reflect.Field)2