use of org.apache.hadoop.io.compress.CompressionCodec in project presto by prestodb.
the class HiveWriteUtils method createRcFileWriter.
private static RecordWriter createRcFileWriter(Path target, JobConf conf, Properties properties, boolean compress) throws IOException {
int columns = properties.getProperty(META_TABLE_COLUMNS).split(",").length;
RCFileOutputFormat.setColumnNumber(conf, columns);
CompressionCodec codec = null;
if (compress) {
codec = ReflectionUtil.newInstance(getOutputCompressorClass(conf, DefaultCodec.class), conf);
}
RCFile.Writer writer = new RCFile.Writer(target.getFileSystem(conf), conf, target, () -> {
}, codec);
return new ExtendedRecordWriter() {
private long length;
@Override
public long getWrittenBytes() {
return length;
}
@Override
public void write(Writable value) throws IOException {
writer.append(value);
length = writer.getLength();
}
@Override
public void close(boolean abort) throws IOException {
writer.close();
if (!abort) {
length = target.getFileSystem(conf).getFileStatus(target).getLen();
}
}
};
}
use of org.apache.hadoop.io.compress.CompressionCodec in project shifu by ShifuML.
the class CombineRecordReader method initializeOne.
public void initializeOne(FileSplit split, TaskAttemptContext context) throws IOException {
Configuration job = context.getConfiguration();
this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
start = split.getStart();
end = start + split.getLength();
final Path file = split.getPath();
// open the file and seek to the start of the split
final FileSystem fs = file.getFileSystem(job);
fileIn = fs.open(file);
CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
if (null != codec) {
isCompressedInput = true;
decompressor = CodecPool.getDecompressor(codec);
if (codec instanceof SplittableCompressionCodec) {
final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
start = cIn.getAdjustedStart();
end = cIn.getAdjustedEnd();
filePosition = cIn;
} else {
in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes);
filePosition = fileIn;
}
} else {
fileIn.seek(start);
in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
filePosition = fileIn;
}
// next() method.
if (start != 0) {
start += in.readLine(new Text(), 0, maxBytesToConsume(start));
}
this.pos = start;
}
use of org.apache.hadoop.io.compress.CompressionCodec in project shifu by ShifuML.
the class ShifuFileUtils method readFilePartsIntoList.
public static List<String> readFilePartsIntoList(String filePath, SourceType sourceType) throws IOException {
List<String> lines = new ArrayList<String>();
FileSystem fs = getFileSystemBySourceType(sourceType);
FileStatus[] fileStatsArr = getFilePartStatus(filePath, sourceType);
CompressionCodecFactory compressionFactory = new CompressionCodecFactory(new Configuration());
for (FileStatus fileStatus : fileStatsArr) {
InputStream is = null;
CompressionCodec codec = compressionFactory.getCodec(fileStatus.getPath());
if (codec != null) {
is = codec.createInputStream(fs.open(fileStatus.getPath()));
} else {
is = fs.open(fileStatus.getPath());
}
lines.addAll(IOUtils.readLines(is));
IOUtils.closeQuietly(is);
}
return lines;
}
use of org.apache.hadoop.io.compress.CompressionCodec in project shifu by ShifuML.
the class HdfsPartFile method openPartFileAsStream.
private InputStream openPartFileAsStream(FileStatus fileStatus) throws IOException {
CompressionCodecFactory compressionFactory = new CompressionCodecFactory(new Configuration());
InputStream is = null;
FileSystem fs = ShifuFileUtils.getFileSystemBySourceType(sourceType);
CompressionCodec codec = compressionFactory.getCodec(fileStatus.getPath());
if (codec != null) {
is = codec.createInputStream(fs.open(fileStatus.getPath()));
} else {
is = fs.open(fileStatus.getPath());
}
return is;
}
use of org.apache.hadoop.io.compress.CompressionCodec in project flink by apache.
the class SequenceFileWriterFactory method create.
@Override
public SequenceFileWriter<K, V> create(FSDataOutputStream out) throws IOException {
org.apache.hadoop.fs.FSDataOutputStream stream = new org.apache.hadoop.fs.FSDataOutputStream(out, null);
CompressionCodec compressionCodec = getCompressionCodec(serializableHadoopConfig.get(), compressionCodecName);
SequenceFile.Writer writer = SequenceFile.createWriter(serializableHadoopConfig.get(), SequenceFile.Writer.stream(stream), SequenceFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass), SequenceFile.Writer.compression(compressionType, compressionCodec));
return new SequenceFileWriter<>(writer);
}
Aggregations