use of org.apache.hadoop.io.compress.CompressionCodec in project presto by prestodb.
the class TestOrcBatchPageSourceMemoryTracking method createTestFile.
public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, Serializer serializer, String compressionCodec, List<TestColumn> testColumns, int numRows, int stripeRows) throws Exception {
// filter out partition keys, which are not written to the file
testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
Properties tableProperties = new Properties();
tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
serializer.initialize(CONFIGURATION, tableProperties);
JobConf jobConf = new JobConf();
if (compressionCodec != null) {
CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
}
RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);
try {
SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));
Object row = objectInspector.create();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
for (int i = 0; i < testColumns.size(); i++) {
Object writeValue = testColumns.get(i).getWriteValue();
if (writeValue instanceof Slice) {
writeValue = ((Slice) writeValue).getBytes();
}
objectInspector.setStructFieldData(row, fields.get(i), writeValue);
}
Writable record = serializer.serialize(row, objectInspector);
recordWriter.write(record);
if (rowNumber % stripeRows == stripeRows - 1) {
flushStripe(recordWriter);
}
}
} finally {
recordWriter.close(false);
}
Path path = new Path(filePath);
path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
File file = new File(filePath);
return new FileSplit(path, 0, file.length(), new String[0]);
}
use of org.apache.hadoop.io.compress.CompressionCodec in project flink by apache.
the class HiveWriterFactory method createRecordWriter.
/**
* Create a {@link RecordWriter} from path.
*/
public RecordWriter createRecordWriter(Path path) {
try {
checkInitialize();
JobConf conf = new JobConf(confWrapper.conf());
if (isCompressed) {
String codecStr = conf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname);
if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) {
// noinspection unchecked
Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) Class.forName(codecStr, true, Thread.currentThread().getContextClassLoader());
FileOutputFormat.setOutputCompressorClass(conf, codec);
}
String typeStr = conf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname);
if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) {
SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr);
SequenceFileOutputFormat.setOutputCompressionType(conf, style);
}
}
return hiveShim.getHiveRecordWriter(conf, hiveOutputFormatClz, recordSerDe.getSerializedClass(), isCompressed, tableProperties, path);
} catch (Exception e) {
throw new FlinkHiveException(e);
}
}
use of org.apache.hadoop.io.compress.CompressionCodec in project flink by apache.
the class SequenceFileWriterFactory method getCompressionCodec.
private CompressionCodec getCompressionCodec(Configuration conf, String compressionCodecName) {
checkNotNull(conf);
checkNotNull(compressionCodecName);
if (compressionCodecName.equals(NO_COMPRESSION)) {
return null;
}
CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
CompressionCodec codec = codecFactory.getCodecByName(compressionCodecName);
if (codec == null) {
throw new RuntimeException("Codec " + compressionCodecName + " not found.");
}
return codec;
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hbase by apache.
the class CellBlockBuilder method encodeCellsTo.
private void encodeCellsTo(OutputStream os, CellScanner cellScanner, Codec codec, CompressionCodec compressor) throws IOException {
Compressor poolCompressor = null;
try {
if (compressor != null) {
if (compressor instanceof Configurable) {
((Configurable) compressor).setConf(this.conf);
}
poolCompressor = CodecPool.getCompressor(compressor);
os = compressor.createOutputStream(os, poolCompressor);
}
Codec.Encoder encoder = codec.getEncoder(os);
while (cellScanner.advance()) {
encoder.write(cellScanner.current());
}
encoder.flush();
} catch (BufferOverflowException | IndexOutOfBoundsException e) {
throw new DoNotRetryIOException(e);
} finally {
os.close();
if (poolCompressor != null) {
CodecPool.returnCompressor(poolCompressor);
}
}
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hbase by apache.
the class Compression method buildCodec.
/**
* Load a codec implementation for an algorithm using the supplied configuration.
* @param conf the configuration to use
* @param algo the algorithm to implement
*/
private static CompressionCodec buildCodec(final Configuration conf, final Algorithm algo) {
try {
String codecClassName = conf.get(algo.confKey, algo.confDefault);
if (codecClassName == null) {
throw new RuntimeException("No codec configured for " + algo.confKey);
}
Class<?> codecClass = getClassLoaderForCodec().loadClass(codecClassName);
CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, new Configuration(conf));
LOG.info("Loaded codec {} for compression algorithm {}", codec.getClass().getCanonicalName(), algo.name());
return codec;
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
}
Aggregations