use of org.apache.hadoop.io.compress.GzipCodec in project hadoop by apache.
the class TestSequenceFileAppend method testAppend.
@Test(timeout = 30000)
public void testAppend() throws Exception {
Path file = new Path(ROOT_PATH, "testseqappend.seq");
fs.delete(file, true);
Text key1 = new Text("Key1");
Text value1 = new Text("Value1");
Text value2 = new Text("Updated");
SequenceFile.Metadata metadata = new SequenceFile.Metadata();
metadata.set(key1, value1);
Writer.Option metadataOption = Writer.metadata(metadata);
Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), metadataOption);
writer.append(1L, "one");
writer.append(2L, "two");
writer.close();
verify2Values(file);
metadata.set(key1, value2);
writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), metadataOption);
// Verify the Meta data is not changed
assertEquals(value1, writer.metadata.get(key1));
writer.append(3L, "three");
writer.append(4L, "four");
writer.close();
verifyAll4Values(file);
// Verify the Meta data readable after append
Reader reader = new Reader(conf, Reader.file(file));
assertEquals(value1, reader.getMetadata().get(key1));
reader.close();
// Verify failure if the compression details are different
try {
Option wrongCompressOption = Writer.compression(CompressionType.RECORD, new GzipCodec());
writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), wrongCompressOption);
writer.close();
fail("Expected IllegalArgumentException for compression options");
} catch (IllegalArgumentException IAE) {
// Expected exception. Ignore it
}
try {
Option wrongCompressOption = Writer.compression(CompressionType.BLOCK, new DefaultCodec());
writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), wrongCompressOption);
writer.close();
fail("Expected IllegalArgumentException for compression options");
} catch (IllegalArgumentException IAE) {
// Expected exception. Ignore it
}
fs.deleteOnExit(file);
}
use of org.apache.hadoop.io.compress.GzipCodec in project hadoop by apache.
the class TestSequenceFileAppend method testAppendNoneCompression.
@Test(timeout = 30000)
public void testAppendNoneCompression() throws Exception {
Path file = new Path(ROOT_PATH, "testseqappendnonecompr.seq");
fs.delete(file, true);
Option compressOption = Writer.compression(CompressionType.NONE);
Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), compressOption);
writer.append(1L, "one");
writer.append(2L, "two");
writer.close();
verify2Values(file);
writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), compressOption);
writer.append(3L, "three");
writer.append(4L, "four");
writer.close();
verifyAll4Values(file);
// Verify failure if the compression details are different or not Provided
try {
writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true));
writer.close();
fail("Expected IllegalArgumentException for compression options");
} catch (IllegalArgumentException iae) {
// Expected exception. Ignore it
}
// Verify failure if the compression details are different
try {
Option wrongCompressOption = Writer.compression(CompressionType.RECORD, new GzipCodec());
writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), wrongCompressOption);
writer.close();
fail("Expected IllegalArgumentException for compression options");
} catch (IllegalArgumentException iae) {
// Expected exception. Ignore it
}
// Codec should be ignored
Option noneWithCodec = Writer.compression(CompressionType.NONE, new DefaultCodec());
writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), noneWithCodec);
writer.close();
fs.deleteOnExit(file);
}
use of org.apache.hadoop.io.compress.GzipCodec in project hadoop by apache.
the class TestSequenceFileAppend method testAppendSort.
@Test(timeout = 30000)
public void testAppendSort() throws Exception {
GenericTestUtils.assumeInNativeProfile();
Path file = new Path(ROOT_PATH, "testseqappendSort.seq");
fs.delete(file, true);
Path sortedFile = new Path(ROOT_PATH, "testseqappendSort.seq.sort");
fs.delete(sortedFile, true);
SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, new JavaSerializationComparator<Long>(), Long.class, String.class, conf);
Option compressOption = Writer.compression(CompressionType.BLOCK, new GzipCodec());
Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), compressOption);
writer.append(2L, "two");
writer.append(1L, "one");
writer.close();
writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), compressOption);
writer.append(4L, "four");
writer.append(3L, "three");
writer.close();
// Sort file after append
sorter.sort(file, sortedFile);
verifyAll4Values(sortedFile);
fs.deleteOnExit(file);
fs.deleteOnExit(sortedFile);
}
use of org.apache.hadoop.io.compress.GzipCodec in project hadoop by apache.
the class TestIFile method testIFileWriterWithCodec.
@Test
public /**
* Create an IFile.Writer using GzipCodec since this code does not
* have a compressor when run via the tests (ie no native libraries).
*/
void testIFileWriterWithCodec() throws Exception {
Configuration conf = new Configuration();
FileSystem localFs = FileSystem.getLocal(conf);
FileSystem rfs = ((LocalFileSystem) localFs).getRaw();
Path path = new Path(new Path("build/test.ifile"), "data");
DefaultCodec codec = new GzipCodec();
codec.setConf(conf);
IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, rfs.create(path), Text.class, Text.class, codec, null);
writer.close();
}
use of org.apache.hadoop.io.compress.GzipCodec in project hadoop by apache.
the class TestCombineTextInputFormat method testGzip.
/**
* Test using the gzip codec for reading
*/
@Test(timeout = 10000)
public void testGzip() throws IOException {
JobConf job = new JobConf(defaultConf);
CompressionCodec gzip = new GzipCodec();
ReflectionUtils.setConf(gzip, job);
localFs.delete(workDir, true);
writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "this is a test\nof gzip\n");
FileInputFormat.setInputPaths(job, workDir);
CombineTextInputFormat format = new CombineTextInputFormat();
InputSplit[] splits = format.getSplits(job, 100);
assertEquals("compressed splits == 1", 1, splits.length);
List<Text> results = readSplit(format, splits[0], job);
assertEquals("splits[0] length", 8, results.size());
final String[] firstList = { "the quick", "brown", "fox jumped", "over", " the lazy", " dog" };
final String[] secondList = { "this is a test", "of gzip" };
String first = results.get(0).toString();
if (first.equals(firstList[0])) {
testResults(results, firstList, secondList);
} else if (first.equals(secondList[0])) {
testResults(results, secondList, firstList);
} else {
fail("unexpected first token!");
}
}
Aggregations