use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project hive by apache.
the class Utilities method createEmptyBuckets.
/**
* Check the existence of buckets according to bucket specification. Create empty buckets if
* needed.
*
* @param hconf
* @param paths A list of empty buckets to create
* @param conf The definition of the FileSink.
* @param reporter The mapreduce reporter object
* @throws HiveException
* @throws IOException
*/
private static void createEmptyBuckets(Configuration hconf, List<Path> paths, FileSinkDesc conf, Reporter reporter) throws HiveException, IOException {
JobConf jc;
if (hconf instanceof JobConf) {
jc = new JobConf(hconf);
} else {
// test code path
jc = new JobConf(hconf);
}
HiveOutputFormat<?, ?> hiveOutputFormat = null;
Class<? extends Writable> outputClass = null;
boolean isCompressed = conf.getCompressed();
TableDesc tableInfo = conf.getTableInfo();
try {
Serializer serializer = (Serializer) tableInfo.getDeserializerClass().newInstance();
serializer.initialize(null, tableInfo.getProperties());
outputClass = serializer.getSerializedClass();
hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, conf.getTableInfo());
} catch (SerDeException e) {
throw new HiveException(e);
} catch (InstantiationException e) {
throw new HiveException(e);
} catch (IllegalAccessException e) {
throw new HiveException(e);
}
for (Path path : paths) {
RecordWriter writer = HiveFileFormatUtils.getRecordWriter(jc, hiveOutputFormat, outputClass, isCompressed, tableInfo.getProperties(), path, reporter);
writer.close(false);
LOG.info("created empty bucket for enforcing bucketing at " + path);
}
}
use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project presto by prestodb.
the class TestOrcPageSourceMemoryTracking method flushStripe.
private static void flushStripe(RecordWriter recordWriter) {
try {
Field writerField = OrcOutputFormat.class.getClassLoader().loadClass(ORC_RECORD_WRITER).getDeclaredField("writer");
writerField.setAccessible(true);
Writer writer = (Writer) writerField.get(recordWriter);
Method flushStripe = WriterImpl.class.getDeclaredMethod("flushStripe");
flushStripe.setAccessible(true);
flushStripe.invoke(writer);
} catch (ReflectiveOperationException e) {
throw Throwables.propagate(e);
}
}
use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project presto by prestodb.
the class HiveZeroRowFileCreator method generateZeroRowFile.
private byte[] generateZeroRowFile(ConnectorSession session, HdfsContext hdfsContext, Properties properties, String serDe, String outputFormatName, HiveCompressionCodec compressionCodec) {
String tmpDirectoryPath = System.getProperty("java.io.tmpdir");
String tmpFileName = format("presto-hive-zero-row-file-creator-%s-%s", session.getQueryId(), randomUUID().toString());
java.nio.file.Path tmpFilePath = Paths.get(tmpDirectoryPath, tmpFileName);
try {
Path target = new Path(format("file://%s/%s", tmpDirectoryPath, tmpFileName));
// https://github.com/prestodb/presto/issues/14401 JSON Format reader does not fetch compression from source system
JobConf conf = configureCompression(hdfsEnvironment.getConfiguration(hdfsContext, target), outputFormatName.equals(HiveStorageFormat.JSON.getOutputFormat()) ? compressionCodec : NONE);
if (outputFormatName.equals(HiveStorageFormat.PAGEFILE.getOutputFormat())) {
createEmptyPageFile(dataSinkFactory, session, target.getFileSystem(conf), target);
return readAllBytes(tmpFilePath);
}
// Some serializers such as Avro set a property in the schema.
initializeSerializer(conf, properties, serDe);
// The code below is not a try with resources because RecordWriter is not Closeable.
RecordWriter recordWriter = HiveWriteUtils.createRecordWriter(target, conf, properties, outputFormatName, session);
recordWriter.close(false);
return readAllBytes(tmpFilePath);
} catch (IOException e) {
throw new UncheckedIOException(e);
} finally {
try {
deleteIfExists(tmpFilePath);
} catch (IOException e) {
log.error(e, "Error deleting temporary file: %s", tmpFilePath);
}
}
}
use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project presto by prestodb.
the class ParquetRecordWriterUtil method createParquetWriter.
public static RecordWriter createParquetWriter(Path target, JobConf conf, Properties properties, boolean compress, ConnectorSession session) throws IOException, ReflectiveOperationException {
conf.setLong(ParquetOutputFormat.BLOCK_SIZE, getParquetWriterBlockSize(session).toBytes());
conf.setLong(ParquetOutputFormat.PAGE_SIZE, getParquetWriterPageSize(session).toBytes());
RecordWriter recordWriter = new MapredParquetOutputFormat().getHiveRecordWriter(conf, target, Text.class, compress, properties, Reporter.NULL);
Object realWriter = REAL_WRITER_FIELD.get(recordWriter);
Object internalWriter = INTERNAL_WRITER_FIELD.get(realWriter);
ParquetFileWriter fileWriter = (ParquetFileWriter) FILE_WRITER_FIELD.get(internalWriter);
return new ExtendedRecordWriter() {
private long length;
@Override
public long getWrittenBytes() {
return length;
}
@Override
public void write(Writable value) throws IOException {
recordWriter.write(value);
length = fileWriter.getPos();
}
@Override
public void close(boolean abort) throws IOException {
recordWriter.close(abort);
if (!abort) {
length = target.getFileSystem(conf).getFileStatus(target).getLen();
}
}
};
}
use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project presto by prestodb.
the class ParquetTester method writeParquetColumn.
private static DataSize writeParquetColumn(JobConf jobConf, File outputFile, CompressionCodecName compressionCodecName, Properties tableProperties, SettableStructObjectInspector objectInspector, Iterator<?>[] valuesByField, Optional<MessageType> parquetSchema, boolean singleLevelArray) throws Exception {
RecordWriter recordWriter = new TestMapredParquetOutputFormat(parquetSchema, singleLevelArray).getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class, compressionCodecName != UNCOMPRESSED, tableProperties, () -> {
});
Object row = objectInspector.create();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
while (stream(valuesByField).allMatch(Iterator::hasNext)) {
for (int field = 0; field < fields.size(); field++) {
Object value = valuesByField[field].next();
objectInspector.setStructFieldData(row, fields.get(field), value);
}
ParquetHiveSerDe serde = new ParquetHiveSerDe();
serde.initialize(jobConf, tableProperties, null);
Writable record = serde.serialize(row, objectInspector);
recordWriter.write(record);
}
recordWriter.close(false);
return succinctBytes(outputFile.length());
}
Aggregations