use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.
the class SequenceFileOutputFormat method getRecordWriter.
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
// get the path of the temporary output file
Path file = FileOutputFormat.getTaskOutputPath(job, name);
FileSystem fs = file.getFileSystem(job);
CompressionCodec codec = null;
CompressionType compressionType = CompressionType.NONE;
if (getCompressOutput(job)) {
// find the kind of compression to do
compressionType = getOutputCompressionType(job);
// find the right codec
Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
codec = ReflectionUtils.newInstance(codecClass, job);
}
final SequenceFile.Writer out = SequenceFile.createWriter(fs, job, file, job.getOutputKeyClass(), job.getOutputValueClass(), compressionType, codec, progress);
return new RecordWriter<K, V>() {
public void write(K key, V value) throws IOException {
out.append(key, value);
}
public void close(Reporter reporter) throws IOException {
out.close();
}
};
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.
the class MapFileOutputFormat method getRecordWriter.
public RecordWriter<WritableComparable, Writable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
// get the path of the temporary output file
Path file = FileOutputFormat.getTaskOutputPath(job, name);
FileSystem fs = file.getFileSystem(job);
CompressionCodec codec = null;
CompressionType compressionType = CompressionType.NONE;
if (getCompressOutput(job)) {
// find the kind of compression to do
compressionType = SequenceFileOutputFormat.getOutputCompressionType(job);
// find the right codec
Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
codec = ReflectionUtils.newInstance(codecClass, job);
}
// ignore the progress parameter, since MapFile is local
final MapFile.Writer out = new MapFile.Writer(job, fs, file.toString(), job.getOutputKeyClass().asSubclass(WritableComparable.class), job.getOutputValueClass().asSubclass(Writable.class), compressionType, codec, progress);
return new RecordWriter<WritableComparable, Writable>() {
public void write(WritableComparable key, Writable value) throws IOException {
out.append(key, value);
}
public void close(Reporter reporter) throws IOException {
out.close();
}
};
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.
the class TestSequenceFile method testSequenceFileMetadata.
/** Unit tests for SequenceFile metadata. */
@Test
public void testSequenceFileMetadata() throws Exception {
LOG.info("Testing SequenceFile with metadata");
int count = 1024 * 10;
CompressionCodec codec = new DefaultCodec();
Path file = new Path(GenericTestUtils.getTempPath("test.seq.metadata"));
Path sortedFile = new Path(GenericTestUtils.getTempPath("test.sorted.seq.metadata"));
Path recordCompressedFile = new Path(GenericTestUtils.getTempPath("test.rc.seq.metadata"));
Path blockCompressedFile = new Path(GenericTestUtils.getTempPath("test.bc.seq.metadata"));
FileSystem fs = FileSystem.getLocal(conf);
SequenceFile.Metadata theMetadata = new SequenceFile.Metadata();
theMetadata.set(new Text("name_1"), new Text("value_1"));
theMetadata.set(new Text("name_2"), new Text("value_2"));
theMetadata.set(new Text("name_3"), new Text("value_3"));
theMetadata.set(new Text("name_4"), new Text("value_4"));
int seed = new Random().nextInt();
try {
// SequenceFile.Writer
writeMetadataTest(fs, count, seed, file, CompressionType.NONE, null, theMetadata);
SequenceFile.Metadata aMetadata = readMetadata(fs, file);
if (!theMetadata.equals(aMetadata)) {
LOG.info("The original metadata:\n" + theMetadata.toString());
LOG.info("The retrieved metadata:\n" + aMetadata.toString());
throw new RuntimeException("metadata not match: " + 1);
}
// SequenceFile.RecordCompressWriter
writeMetadataTest(fs, count, seed, recordCompressedFile, CompressionType.RECORD, codec, theMetadata);
aMetadata = readMetadata(fs, recordCompressedFile);
if (!theMetadata.equals(aMetadata)) {
LOG.info("The original metadata:\n" + theMetadata.toString());
LOG.info("The retrieved metadata:\n" + aMetadata.toString());
throw new RuntimeException("metadata not match: " + 2);
}
// SequenceFile.BlockCompressWriter
writeMetadataTest(fs, count, seed, blockCompressedFile, CompressionType.BLOCK, codec, theMetadata);
aMetadata = readMetadata(fs, blockCompressedFile);
if (!theMetadata.equals(aMetadata)) {
LOG.info("The original metadata:\n" + theMetadata.toString());
LOG.info("The retrieved metadata:\n" + aMetadata.toString());
throw new RuntimeException("metadata not match: " + 3);
}
// SequenceFile.Sorter
sortMetadataTest(fs, file, sortedFile, theMetadata);
aMetadata = readMetadata(fs, recordCompressedFile);
if (!theMetadata.equals(aMetadata)) {
LOG.info("The original metadata:\n" + theMetadata.toString());
LOG.info("The retrieved metadata:\n" + aMetadata.toString());
throw new RuntimeException("metadata not match: " + 4);
}
} finally {
fs.close();
}
LOG.info("Successfully tested SequenceFile with metadata");
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.
the class TestSequenceFile method main.
/** For debugging and testing. */
public static void main(String[] args) throws Exception {
int count = 1024 * 1024;
int megabytes = 1;
int factor = 10;
boolean create = true;
boolean rwonly = false;
boolean check = false;
boolean fast = false;
boolean merge = false;
String compressType = "NONE";
String compressionCodec = "org.apache.hadoop.io.compress.DefaultCodec";
Path file = null;
int seed = new Random().nextInt();
String usage = "Usage: testsequencefile " + "[-count N] " + "[-seed #] [-check] [-compressType <NONE|RECORD|BLOCK>] " + "-codec <compressionCodec> " + "[[-rwonly] | {[-megabytes M] [-factor F] [-nocreate] [-fast] [-merge]}] " + " file";
if (args.length == 0) {
System.err.println(usage);
System.exit(-1);
}
FileSystem fs = null;
try {
for (int i = 0; i < args.length; ++i) {
// parse command line
if (args[i] == null) {
continue;
} else if (args[i].equals("-count")) {
count = Integer.parseInt(args[++i]);
} else if (args[i].equals("-megabytes")) {
megabytes = Integer.parseInt(args[++i]);
} else if (args[i].equals("-factor")) {
factor = Integer.parseInt(args[++i]);
} else if (args[i].equals("-seed")) {
seed = Integer.parseInt(args[++i]);
} else if (args[i].equals("-rwonly")) {
rwonly = true;
} else if (args[i].equals("-nocreate")) {
create = false;
} else if (args[i].equals("-check")) {
check = true;
} else if (args[i].equals("-fast")) {
fast = true;
} else if (args[i].equals("-merge")) {
merge = true;
} else if (args[i].equals("-compressType")) {
compressType = args[++i];
} else if (args[i].equals("-codec")) {
compressionCodec = args[++i];
} else {
// file is required parameter
file = new Path(args[i]);
}
}
TestSequenceFile test = new TestSequenceFile();
fs = file.getFileSystem(test.conf);
LOG.info("count = " + count);
LOG.info("megabytes = " + megabytes);
LOG.info("factor = " + factor);
LOG.info("create = " + create);
LOG.info("seed = " + seed);
LOG.info("rwonly = " + rwonly);
LOG.info("check = " + check);
LOG.info("fast = " + fast);
LOG.info("merge = " + merge);
LOG.info("compressType = " + compressType);
LOG.info("compressionCodec = " + compressionCodec);
LOG.info("file = " + file);
if (rwonly && (!create || merge || fast)) {
System.err.println(usage);
System.exit(-1);
}
CompressionType compressionType = CompressionType.valueOf(compressType);
CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(test.conf.getClassByName(compressionCodec), test.conf);
if (rwonly || (create && !merge)) {
test.writeTest(fs, count, seed, file, compressionType, codec);
test.readTest(fs, count, seed, file);
}
if (!rwonly) {
if (merge) {
test.mergeTest(fs, count, seed, file, compressionType, fast, factor, megabytes);
} else {
test.sortTest(fs, count, megabytes, factor, fast, file);
}
}
if (check) {
test.checkSort(fs, count, seed, file);
}
} finally {
if (fs != null) {
fs.close();
}
}
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.
the class FileOutputFormat method getOutputCompressorClass.
/**
* Get the {@link CompressionCodec} for compressing the job outputs.
* @param job the {@link Job} to look in
* @param defaultValue the {@link CompressionCodec} to return if not set
* @return the {@link CompressionCodec} to be used to compress the
* job outputs
* @throws IllegalArgumentException if the class was specified, but not found
*/
public static Class<? extends CompressionCodec> getOutputCompressorClass(JobContext job, Class<? extends CompressionCodec> defaultValue) {
Class<? extends CompressionCodec> codecClass = defaultValue;
Configuration conf = job.getConfiguration();
String name = conf.get(FileOutputFormat.COMPRESS_CODEC);
if (name != null) {
try {
codecClass = conf.getClassByName(name).asSubclass(CompressionCodec.class);
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("Compression codec " + name + " was not found.", e);
}
}
return codecClass;
}
Aggregations