use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.
the class TextOutputFormat method getRecordWriter.
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
Configuration conf = job.getConfiguration();
boolean isCompressed = getCompressOutput(job);
String keyValueSeparator = conf.get(SEPERATOR, "\t");
CompressionCodec codec = null;
String extension = "";
if (isCompressed) {
Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
codec = ReflectionUtils.newInstance(codecClass, conf);
extension = codec.getDefaultExtension();
}
Path file = getDefaultWorkFile(job, extension);
FileSystem fs = file.getFileSystem(conf);
FSDataOutputStream fileOut = fs.create(file, false);
if (isCompressed) {
return new LineRecordWriter<>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator);
} else {
return new LineRecordWriter<>(fileOut, keyValueSeparator);
}
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.
the class FSImageCompression method createCompression.
/**
* Create a compression instance using the codec specified by
* <code>codecClassName</code>
*/
static FSImageCompression createCompression(Configuration conf, String codecClassName) throws IOException {
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
CompressionCodec codec = factory.getCodecByClassName(codecClassName);
if (codec == null) {
throw new IOException("Not a supported codec: " + codecClassName);
}
return new FSImageCompression(codec);
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop-pcap by RIPE-NCC.
the class PcapInputFormat method initPcapRecordReader.
public static PcapRecordReader initPcapRecordReader(Path path, long start, long length, Reporter reporter, Configuration conf) throws IOException {
FileSystem fs = path.getFileSystem(conf);
FSDataInputStream baseStream = fs.open(path);
DataInputStream stream = baseStream;
CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf);
final CompressionCodec codec = compressionCodecs.getCodec(path);
if (codec != null)
stream = new DataInputStream(codec.createInputStream(stream));
PcapReader reader = initPcapReader(stream, conf);
return new PcapRecordReader(reader, start, length, baseStream, stream, reporter);
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop-pcap by RIPE-NCC.
the class PcapInputFormat method initPcapRecordReader.
public static PcapRecordReader initPcapRecordReader(Path path, long start, long length, TaskAttemptContext context) throws IOException {
Configuration conf = context.getConfiguration();
FileSystem fs = path.getFileSystem(conf);
FSDataInputStream baseStream = fs.open(path);
DataInputStream stream = baseStream;
CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf);
final CompressionCodec codec = compressionCodecs.getCodec(path);
if (codec != null)
stream = new DataInputStream(codec.createInputStream(stream));
PcapReader reader = initPcapReader(stream, conf);
return new PcapRecordReader(reader, start, length, baseStream, stream, context);
}
use of org.apache.hadoop.io.compress.CompressionCodec in project jena by apache.
the class AbstractLineBasedNodeTupleReader method initialize.
@Override
public final void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
LOG.debug("initialize({}, {})", genericSplit, context);
// Assuming file split
if (!(genericSplit instanceof FileSplit))
throw new IOException("This record reader only supports FileSplit inputs");
FileSplit split = (FileSplit) genericSplit;
// Intermediate : RDFParser but need to make a Iterator<Quad/Triple>
LabelToNode labelToNode = RdfIOUtils.createLabelToNode(context, split.getPath());
maker = new ParserProfileStd(RiotLib.factoryRDF(labelToNode), ErrorHandlerFactory.errorHandlerStd, IRIResolver.create(), PrefixMapFactory.createForInput(), null, true, false);
Configuration config = context.getConfiguration();
this.ignoreBadTuples = config.getBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, true);
if (this.ignoreBadTuples)
LOG.warn("Configured to ignore bad tuples, parsing errors will be logged and the bad line skipped but no errors will be thrownConsider setting {} to false to disable this behaviour", RdfIOConstants.INPUT_IGNORE_BAD_TUPLES);
// Figure out what portion of the file to read
this.maxLineLength = config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE);
start = split.getStart();
end = start + split.getLength();
final Path file = split.getPath();
long totalLength = file.getFileSystem(context.getConfiguration()).getFileStatus(file).getLen();
compressionCodecs = new CompressionCodecFactory(config);
final CompressionCodec codec = compressionCodecs.getCodec(file);
LOG.info(String.format("Got split with start %d and length %d for file with total length of %d", new Object[] { start, split.getLength(), totalLength }));
// Open the file and seek to the start of the split
FileSystem fs = file.getFileSystem(config);
FSDataInputStream fileIn = fs.open(file);
boolean skipFirstLine = false;
if (codec != null) {
// Add 1 and verify we got complete split
if (totalLength > split.getLength() + 1)
throw new IOException("This record reader can only be used with compressed input where the split covers the whole file");
in = new LineReader(codec.createInputStream(fileIn), config);
estLength = end;
end = Long.MAX_VALUE;
} else {
// Uncompressed input
if (start != 0) {
skipFirstLine = true;
--start;
fileIn.seek(start);
}
in = new LineReader(fileIn, config);
}
// NLineInputFormat will provide the split information to use
if (skipFirstLine) {
start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
}
this.pos = start;
}
Aggregations