use of org.apache.hadoop.io.compress.CompressionCodecFactory in project storm by apache.
the class SequenceFileBolt method doPrepare.
@Override
public void doPrepare(Map conf, TopologyContext topologyContext, OutputCollector collector) throws IOException {
LOG.info("Preparing Sequence File Bolt...");
if (this.format == null)
throw new IllegalStateException("SequenceFormat must be specified.");
this.fs = FileSystem.get(URI.create(this.fsUrl), hdfsConfig);
this.codecFactory = new CompressionCodecFactory(hdfsConfig);
}
use of org.apache.hadoop.io.compress.CompressionCodecFactory in project presto by prestodb.
the class TestOrcPageSourceMemoryTracking method createTestFile.
public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns, int numRows) throws Exception {
// filter out partition keys, which are not written to the file
testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
Properties tableProperties = new Properties();
tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
serDe.initialize(CONFIGURATION, tableProperties);
JobConf jobConf = new JobConf();
if (compressionCodec != null) {
CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
}
RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);
try {
SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));
Object row = objectInspector.create();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
for (int i = 0; i < testColumns.size(); i++) {
Object writeValue = testColumns.get(i).getWriteValue();
if (writeValue instanceof Slice) {
writeValue = ((Slice) writeValue).getBytes();
}
objectInspector.setStructFieldData(row, fields.get(i), writeValue);
}
Writable record = serDe.serialize(row, objectInspector);
recordWriter.write(record);
if (rowNumber % STRIPE_ROWS == STRIPE_ROWS - 1) {
flushStripe(recordWriter);
}
}
} finally {
recordWriter.close(false);
}
Path path = new Path(filePath);
path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
File file = new File(filePath);
return new FileSplit(path, 0, file.length(), new String[0]);
}
use of org.apache.hadoop.io.compress.CompressionCodecFactory in project hadoop by apache.
the class CompressionEmulationUtil method configureCompressionEmulation.
/**
* Extracts compression/decompression related configuration parameters from
* the source configuration to the target configuration.
*/
static void configureCompressionEmulation(Configuration source, Configuration target) {
// enable output compression
target.setBoolean(FileOutputFormat.COMPRESS, source.getBoolean(FileOutputFormat.COMPRESS, false));
// set the job output compression codec
String jobOutputCompressionCodec = source.get(FileOutputFormat.COMPRESS_CODEC);
if (jobOutputCompressionCodec != null) {
target.set(FileOutputFormat.COMPRESS_CODEC, jobOutputCompressionCodec);
}
// set the job output compression type
String jobOutputCompressionType = source.get(FileOutputFormat.COMPRESS_TYPE);
if (jobOutputCompressionType != null) {
target.set(FileOutputFormat.COMPRESS_TYPE, jobOutputCompressionType);
}
// enable map output compression
target.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, source.getBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, false));
// set the map output compression codecs
String mapOutputCompressionCodec = source.get(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC);
if (mapOutputCompressionCodec != null) {
target.set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, mapOutputCompressionCodec);
}
// enable input decompression
//TODO replace with mapInputBytes and hdfsBytesRead
Path[] inputs = org.apache.hadoop.mapred.FileInputFormat.getInputPaths(new JobConf(source));
boolean needsCompressedInput = false;
CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(source);
for (Path input : inputs) {
CompressionCodec codec = compressionCodecs.getCodec(input);
if (codec != null) {
needsCompressedInput = true;
}
}
setInputCompressionEmulationEnabled(target, needsCompressedInput);
}
use of org.apache.hadoop.io.compress.CompressionCodecFactory in project hadoop by apache.
the class Anonymizer method createJsonGenerator.
// Creates a JSON generator
private JsonGenerator createJsonGenerator(Configuration conf, Path path) throws IOException {
FileSystem outFS = path.getFileSystem(conf);
CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path);
OutputStream output;
Compressor compressor = null;
if (codec != null) {
compressor = CodecPool.getCompressor(codec);
output = codec.createOutputStream(outFS.create(path), compressor);
} else {
output = outFS.create(path);
}
JsonGenerator outGen = outFactory.createGenerator(output, JsonEncoding.UTF8);
outGen.useDefaultPrettyPrinter();
return outGen;
}
use of org.apache.hadoop.io.compress.CompressionCodecFactory in project carbondata by apache.
the class FileFactory method getDataInputStream.
public static DataInputStream getDataInputStream(String path, FileType fileType, int bufferSize) throws IOException {
path = path.replace("\\", "/");
boolean gzip = path.endsWith(".gz");
boolean bzip2 = path.endsWith(".bz2");
InputStream stream;
switch(fileType) {
case LOCAL:
path = getUpdatedFilePath(path, fileType);
if (gzip) {
stream = new GZIPInputStream(new FileInputStream(path));
} else if (bzip2) {
stream = new BZip2CompressorInputStream(new FileInputStream(path));
} else {
stream = new FileInputStream(path);
}
break;
case HDFS:
case ALLUXIO:
case VIEWFS:
Path pt = new Path(path);
FileSystem fs = pt.getFileSystem(configuration);
if (bufferSize == -1) {
stream = fs.open(pt);
} else {
stream = fs.open(pt, bufferSize);
}
String codecName = null;
if (gzip) {
codecName = GzipCodec.class.getName();
} else if (bzip2) {
codecName = BZip2Codec.class.getName();
}
if (null != codecName) {
CompressionCodecFactory ccf = new CompressionCodecFactory(configuration);
CompressionCodec codec = ccf.getCodecByClassName(codecName);
stream = codec.createInputStream(stream);
}
break;
default:
throw new UnsupportedOperationException("unsupported file system");
}
return new DataInputStream(new BufferedInputStream(stream));
}
Aggregations