use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project hive by apache.
the class Utilities method createEmptyFile.
@SuppressWarnings({ "rawtypes", "unchecked" })
private static Path createEmptyFile(Path hiveScratchDir, HiveOutputFormat outFileFormat, JobConf job, Properties props, boolean dummyRow) throws IOException, InstantiationException, IllegalAccessException {
// create a dummy empty file in a new directory
String newDir = hiveScratchDir + Path.SEPARATOR + UUID.randomUUID().toString();
Path newPath = new Path(newDir);
FileSystem fs = newPath.getFileSystem(job);
fs.mkdirs(newPath);
// Qualify the path against the file system. The user configured path might contain default port which is skipped
// in the file status. This makes sure that all paths which goes into PathToPartitionInfo are always listed status
// file path.
newPath = fs.makeQualified(newPath);
String newFile = newDir + Path.SEPARATOR + "emptyFile";
Path newFilePath = new Path(newFile);
RecordWriter recWriter = outFileFormat.getHiveRecordWriter(job, newFilePath, Text.class, false, props, null);
if (dummyRow) {
// empty files are omitted at CombineHiveInputFormat.
// for meta-data only query, it effectively makes partition columns disappear..
// this could be fixed by other methods, but this seemed to be the most easy (HIVEV-2955)
// written via HiveIgnoreKeyTextOutputFormat
recWriter.write(new Text("empty"));
}
recWriter.close(false);
return StringInternUtils.internUriStringsInPath(newPath);
}
use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project hive by apache.
the class ColumnarStorageBench method prepareBenchmark.
/**
* Initializes resources that will be needed for each of the benchmark tests.
*
* @throws SerDeException If it cannot initialize the desired test format.
* @throws IOException If it cannot write data to temporary files.
*/
@Setup(Level.Trial)
public void prepareBenchmark() throws SerDeException, IOException {
if (format.equalsIgnoreCase("parquet") || format.equalsIgnoreCase("parquet-vec")) {
storageFormatTest = new ParquetStorageFormatTest();
} else if (format.equalsIgnoreCase("orc")) {
storageFormatTest = new OrcStorageFormatTest();
} else {
throw new IllegalArgumentException("Invalid file format argument: " + format);
}
for (int i = 0; i < rows.length; i++) {
recordWritable[i] = storageFormatTest.serialize(rows[i], oi);
}
fs = FileSystem.getLocal(new Configuration());
writeFile = createTempFile();
writePath = new Path(writeFile.getPath());
readFile = createTempFile();
readPath = new Path(readFile.getPath());
/*
* Write a bunch of random rows that will be used for read benchmark.
*/
RecordWriter writer = storageFormatTest.getRecordWriter(readPath);
storageFormatTest.writeRecords(writer, recordWritable);
writer.close(false);
}
use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project hive by apache.
the class HiveNullValueSequenceFileOutputFormat method getHiveRecordWriter.
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
FileSystem fs = finalOutPath.getFileSystem(jc);
final SequenceFile.Writer outStream = Utilities.createSequenceWriter(jc, fs, finalOutPath, HiveKey.class, NullWritable.class, isCompressed, progress);
keyWritable = new HiveKey();
keyIsText = valueClass.equals(Text.class);
return new RecordWriter() {
@Override
public void write(Writable r) throws IOException {
if (keyIsText) {
Text text = (Text) r;
keyWritable.set(text.getBytes(), 0, text.getLength());
} else {
BytesWritable bw = (BytesWritable) r;
// Once we drop support for old Hadoop versions, change these
// to getBytes() and getLength() to fix the deprecation warnings.
// Not worth a shim.
keyWritable.set(bw.get(), 0, bw.getSize());
}
keyWritable.setHashCode(r.hashCode());
outStream.append(keyWritable, NULL_WRITABLE);
}
@Override
public void close(boolean abort) throws IOException {
outStream.close();
}
};
}
use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project hive by apache.
the class HiveIgnoreKeyTextOutputFormat method getHiveRecordWriter.
/**
* create the final out file, and output row by row. After one row is
* appended, a configured row separator is appended
*
* @param jc
* the job configuration file
* @param outPath
* the final output file to be created
* @param valueClass
* the value class used for create
* @param isCompressed
* whether the content is compressed or not
* @param tableProperties
* the tableProperties of this file's corresponding table
* @param progress
* progress used for status report
* @return the RecordWriter
*/
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
int rowSeparator = 0;
String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n");
try {
rowSeparator = Byte.parseByte(rowSeparatorString);
} catch (NumberFormatException e) {
rowSeparator = rowSeparatorString.charAt(0);
}
final int finalRowSeparator = rowSeparator;
FileSystem fs = outPath.getFileSystem(jc);
final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath, progress), isCompressed);
return new RecordWriter() {
@Override
public void write(Writable r) throws IOException {
if (r instanceof Text) {
Text tr = (Text) r;
outStream.write(tr.getBytes(), 0, tr.getLength());
outStream.write(finalRowSeparator);
} else {
// Binary SerDes always write out BytesWritable
BytesWritable bw = (BytesWritable) r;
outStream.write(bw.get(), 0, bw.getSize());
outStream.write(finalRowSeparator);
}
}
@Override
public void close(boolean abort) throws IOException {
outStream.close();
}
};
}
use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project presto by prestodb.
the class TestOrcPageSourceMemoryTracking method createTestFile.
public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns, int numRows) throws Exception {
// filter out partition keys, which are not written to the file
testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
Properties tableProperties = new Properties();
tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
serDe.initialize(CONFIGURATION, tableProperties);
JobConf jobConf = new JobConf();
if (compressionCodec != null) {
CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
}
RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);
try {
SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));
Object row = objectInspector.create();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
for (int i = 0; i < testColumns.size(); i++) {
Object writeValue = testColumns.get(i).getWriteValue();
if (writeValue instanceof Slice) {
writeValue = ((Slice) writeValue).getBytes();
}
objectInspector.setStructFieldData(row, fields.get(i), writeValue);
}
Writable record = serDe.serialize(row, objectInspector);
recordWriter.write(record);
if (rowNumber % STRIPE_ROWS == STRIPE_ROWS - 1) {
flushStripe(recordWriter);
}
}
} finally {
recordWriter.close(false);
}
Path path = new Path(filePath);
path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
File file = new File(filePath);
return new FileSplit(path, 0, file.length(), new String[0]);
}
Aggregations