use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.
the class TestSequenceFile method testSequenceFileMetadata.
/** Unit tests for SequenceFile metadata. */
@Test
public void testSequenceFileMetadata() throws Exception {
LOG.info("Testing SequenceFile with metadata");
int count = 1024 * 10;
CompressionCodec codec = new DefaultCodec();
Path file = new Path(GenericTestUtils.getTempPath("test.seq.metadata"));
Path sortedFile = new Path(GenericTestUtils.getTempPath("test.sorted.seq.metadata"));
Path recordCompressedFile = new Path(GenericTestUtils.getTempPath("test.rc.seq.metadata"));
Path blockCompressedFile = new Path(GenericTestUtils.getTempPath("test.bc.seq.metadata"));
FileSystem fs = FileSystem.getLocal(conf);
SequenceFile.Metadata theMetadata = new SequenceFile.Metadata();
theMetadata.set(new Text("name_1"), new Text("value_1"));
theMetadata.set(new Text("name_2"), new Text("value_2"));
theMetadata.set(new Text("name_3"), new Text("value_3"));
theMetadata.set(new Text("name_4"), new Text("value_4"));
int seed = new Random().nextInt();
try {
// SequenceFile.Writer
writeMetadataTest(fs, count, seed, file, CompressionType.NONE, null, theMetadata);
SequenceFile.Metadata aMetadata = readMetadata(fs, file);
if (!theMetadata.equals(aMetadata)) {
LOG.info("The original metadata:\n" + theMetadata.toString());
LOG.info("The retrieved metadata:\n" + aMetadata.toString());
throw new RuntimeException("metadata not match: " + 1);
}
// SequenceFile.RecordCompressWriter
writeMetadataTest(fs, count, seed, recordCompressedFile, CompressionType.RECORD, codec, theMetadata);
aMetadata = readMetadata(fs, recordCompressedFile);
if (!theMetadata.equals(aMetadata)) {
LOG.info("The original metadata:\n" + theMetadata.toString());
LOG.info("The retrieved metadata:\n" + aMetadata.toString());
throw new RuntimeException("metadata not match: " + 2);
}
// SequenceFile.BlockCompressWriter
writeMetadataTest(fs, count, seed, blockCompressedFile, CompressionType.BLOCK, codec, theMetadata);
aMetadata = readMetadata(fs, blockCompressedFile);
if (!theMetadata.equals(aMetadata)) {
LOG.info("The original metadata:\n" + theMetadata.toString());
LOG.info("The retrieved metadata:\n" + aMetadata.toString());
throw new RuntimeException("metadata not match: " + 3);
}
// SequenceFile.Sorter
sortMetadataTest(fs, file, sortedFile, theMetadata);
aMetadata = readMetadata(fs, recordCompressedFile);
if (!theMetadata.equals(aMetadata)) {
LOG.info("The original metadata:\n" + theMetadata.toString());
LOG.info("The retrieved metadata:\n" + aMetadata.toString());
throw new RuntimeException("metadata not match: " + 4);
}
} finally {
fs.close();
}
LOG.info("Successfully tested SequenceFile with metadata");
}
use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.
the class TestSequenceFileAppend method testAppendNoneCompression.
@Test(timeout = 30000)
public void testAppendNoneCompression() throws Exception {
Path file = new Path(ROOT_PATH, "testseqappendnonecompr.seq");
fs.delete(file, true);
Option compressOption = Writer.compression(CompressionType.NONE);
Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), compressOption);
writer.append(1L, "one");
writer.append(2L, "two");
writer.close();
verify2Values(file);
writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), compressOption);
writer.append(3L, "three");
writer.append(4L, "four");
writer.close();
verifyAll4Values(file);
// Verify failure if the compression details are different or not Provided
try {
writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true));
writer.close();
fail("Expected IllegalArgumentException for compression options");
} catch (IllegalArgumentException iae) {
// Expected exception. Ignore it
}
// Verify failure if the compression details are different
try {
Option wrongCompressOption = Writer.compression(CompressionType.RECORD, new GzipCodec());
writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), wrongCompressOption);
writer.close();
fail("Expected IllegalArgumentException for compression options");
} catch (IllegalArgumentException iae) {
// Expected exception. Ignore it
}
// Codec should be ignored
Option noneWithCodec = Writer.compression(CompressionType.NONE, new DefaultCodec());
writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), noneWithCodec);
writer.close();
fs.deleteOnExit(file);
}
use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.
the class TestHSync method testSequenceFileSync.
/** Test hsync via SequenceFiles */
@Test
public void testSequenceFileSync() throws Exception {
Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
final FileSystem fs = cluster.getFileSystem();
final Path p = new Path("/testSequenceFileSync/foo");
final int len = 1 << 16;
FSDataOutputStream out = fs.create(p, FsPermission.getDefault(), EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE, CreateFlag.SYNC_BLOCK), 4096, (short) 1, len, null);
Writer w = SequenceFile.createWriter(new Configuration(), Writer.stream(out), Writer.keyClass(RandomDatum.class), Writer.valueClass(RandomDatum.class), Writer.compression(CompressionType.NONE, new DefaultCodec()));
w.hflush();
checkSyncMetric(cluster, 0);
w.hsync();
checkSyncMetric(cluster, 1);
int seed = new Random().nextInt();
RandomDatum.Generator generator = new RandomDatum.Generator(seed);
generator.next();
w.append(generator.getKey(), generator.getValue());
w.hsync();
checkSyncMetric(cluster, 2);
w.close();
checkSyncMetric(cluster, 2);
out.close();
checkSyncMetric(cluster, 3);
cluster.shutdown();
}
use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.
the class TestIFile method testIFileWriterWithCodec.
@Test
public /**
* Create an IFile.Writer using GzipCodec since this code does not
* have a compressor when run via the tests (ie no native libraries).
*/
void testIFileWriterWithCodec() throws Exception {
Configuration conf = new Configuration();
FileSystem localFs = FileSystem.getLocal(conf);
FileSystem rfs = ((LocalFileSystem) localFs).getRaw();
Path path = new Path(new Path("build/test.ifile"), "data");
DefaultCodec codec = new GzipCodec();
codec.setConf(conf);
IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, rfs.create(path), Text.class, Text.class, codec, null);
writer.close();
}
use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.
the class TestSequenceFileMergeProgress method runTest.
public void runTest(CompressionType compressionType) throws IOException {
JobConf job = new JobConf();
FileSystem fs = FileSystem.getLocal(job);
Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
Path file = new Path(dir, "test.seq");
Path tempDir = new Path(dir, "tmp");
fs.delete(dir, true);
FileInputFormat.setInputPaths(job, dir);
fs.mkdirs(tempDir);
LongWritable tkey = new LongWritable();
Text tval = new Text();
SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class, compressionType, new DefaultCodec());
try {
for (int i = 0; i < RECORDS; ++i) {
tkey.set(1234);
tval.set("valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue");
writer.append(tkey, tval);
}
} finally {
writer.close();
}
long fileLength = fs.getFileStatus(file).getLen();
LOG.info("With compression = " + compressionType + ": " + "compressed length = " + fileLength);
SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, job.getOutputKeyComparator(), job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job);
Path[] paths = new Path[] { file };
RawKeyValueIterator rIter = sorter.merge(paths, tempDir, false);
int count = 0;
while (rIter.next()) {
count++;
}
assertEquals(RECORDS, count);
assertEquals(1.0f, rIter.getProgress().get(), 0.0000);
}
Aggregations