use of org.apache.hadoop.io.compress.CompressionCodec in project carbondata by apache.
the class AbstractDFSCarbonFile method getDataOutputStream.
@Override
public DataOutputStream getDataOutputStream(String path, FileFactory.FileType fileType, int bufferSize, String compressor) throws IOException {
path = path.replace("\\", "/");
Path pt = new Path(path);
OutputStream outputStream;
if (bufferSize <= 0) {
outputStream = fs.create(pt);
} else {
outputStream = fs.create(pt, true, bufferSize);
}
String codecName = getCodecNameFromCompressor(compressor);
if (!codecName.isEmpty()) {
CompressionCodec codec = new CompressionCodecFactory(hadoopConf).getCodecByName(codecName);
outputStream = codec.createOutputStream(outputStream);
}
return new DataOutputStream(new BufferedOutputStream(outputStream));
}
use of org.apache.hadoop.io.compress.CompressionCodec in project accumulo by apache.
the class CompressionTest method testManyStartNotNull.
@Test(timeout = 60 * 1000)
public void testManyStartNotNull() throws IOException, InterruptedException, ExecutionException {
for (final Algorithm al : Algorithm.values()) {
if (isSupported.get(al) != null && isSupported.get(al)) {
// first call to issupported should be true
Assert.assertTrue(al + " is not supported, but should be", al.isSupported());
final CompressionCodec codec = al.getCodec();
Assert.assertNotNull(al + " should not be null", codec);
ExecutorService service = Executors.newFixedThreadPool(10);
ArrayList<Future<Boolean>> results = new ArrayList<>();
for (int i = 0; i < 30; i++) {
results.add(service.submit(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
Assert.assertNotNull(al + " should not be null", al.getCodec());
return true;
}
}));
}
service.shutdown();
Assert.assertNotNull(al + " should not be null", codec);
while (!service.awaitTermination(1, TimeUnit.SECONDS)) {
// wait
}
for (Future<Boolean> result : results) {
Assert.assertTrue(al + " resulted in a failed call to getcodec within the thread pool", result.get());
}
}
}
}
use of org.apache.hadoop.io.compress.CompressionCodec in project accumulo by apache.
the class CompressionTest method testSupport.
@Before
public void testSupport() {
// we can safely assert that GZ exists by virtue of it being the DefaultCodec
isSupported.put(Compression.Algorithm.GZ, true);
Configuration myConf = new Configuration();
String extClazz = System.getProperty(Compression.Algorithm.CONF_LZO_CLASS);
String clazz = (extClazz != null) ? extClazz : "org.apache.hadoop.io.compress.LzoCodec";
try {
CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(Class.forName(clazz), myConf);
Assert.assertNotNull(codec);
isSupported.put(Compression.Algorithm.LZO, true);
} catch (ClassNotFoundException e) {
// that is okay
}
extClazz = System.getProperty(Compression.Algorithm.CONF_SNAPPY_CLASS);
clazz = (extClazz != null) ? extClazz : "org.apache.hadoop.io.compress.SnappyCodec";
try {
CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(Class.forName(clazz), myConf);
Assert.assertNotNull(codec);
isSupported.put(Compression.Algorithm.SNAPPY, true);
} catch (ClassNotFoundException e) {
// that is okay
}
}
use of org.apache.hadoop.io.compress.CompressionCodec in project elephant-bird by twitter.
the class LzoRecordReader method initialize.
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException {
FileSplit split = (FileSplit) genericSplit;
start_ = split.getStart();
end_ = start_ + split.getLength();
final Path file = split.getPath();
Configuration job = HadoopCompat.getConfiguration(context);
errorTracker = new InputErrorTracker(job);
LOG.info("input split: " + file + " " + start_ + ":" + end_);
FileSystem fs = file.getFileSystem(job);
CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
final CompressionCodec codec = compressionCodecs.getCodec(file);
if (codec == null) {
throw new IOException("No codec for file " + file + " found, cannot run");
}
// Open the file and seek to the start of the split.
fileIn_ = fs.open(split.getPath());
// Creates input stream and also reads the file header.
createInputReader(codec.createInputStream(fileIn_), job);
if (start_ != 0) {
fileIn_.seek(start_);
skipToNextSyncPoint(false);
start_ = fileIn_.getPos();
LOG.info("Start is now " + start_);
} else {
skipToNextSyncPoint(true);
}
pos_ = start_;
}
use of org.apache.hadoop.io.compress.CompressionCodec in project elephant-bird by twitter.
the class RCFileOutputFormat method createRCFileWriter.
protected RCFile.Writer createRCFileWriter(TaskAttemptContext job, Text columnMetadata) throws IOException {
Configuration conf = HadoopCompat.getConfiguration(job);
// override compression codec if set.
String codecOverride = conf.get(COMPRESSION_CODEC_CONF);
if (codecOverride != null) {
conf.setBoolean("mapred.output.compress", true);
conf.set("mapred.output.compression.codec", codecOverride);
}
CompressionCodec codec = null;
if (getCompressOutput(job)) {
Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
}
Metadata metadata = null;
if (columnMetadata != null) {
metadata = new Metadata();
metadata.set(new Text(RCFileUtil.COLUMN_METADATA_PROTOBUF_KEY), columnMetadata);
}
String ext = conf.get(EXTENSION_OVERRIDE_CONF, DEFAULT_EXTENSION);
Path file = getDefaultWorkFile(job, ext.equalsIgnoreCase("none") ? null : ext);
LOG.info("writing to rcfile " + file.toString());
return new RCFile.Writer(file.getFileSystem(conf), conf, file, job, metadata, codec);
}
Aggregations