Search in sources :

Example 16 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.

the class TestSequenceFile method testSequenceFileMetadata.

/** Unit tests for SequenceFile metadata. */
@Test
public void testSequenceFileMetadata() throws Exception {
    LOG.info("Testing SequenceFile with metadata");
    int count = 1024 * 10;
    CompressionCodec codec = new DefaultCodec();
    Path file = new Path(GenericTestUtils.getTempPath("test.seq.metadata"));
    Path sortedFile = new Path(GenericTestUtils.getTempPath("test.sorted.seq.metadata"));
    Path recordCompressedFile = new Path(GenericTestUtils.getTempPath("test.rc.seq.metadata"));
    Path blockCompressedFile = new Path(GenericTestUtils.getTempPath("test.bc.seq.metadata"));
    FileSystem fs = FileSystem.getLocal(conf);
    SequenceFile.Metadata theMetadata = new SequenceFile.Metadata();
    theMetadata.set(new Text("name_1"), new Text("value_1"));
    theMetadata.set(new Text("name_2"), new Text("value_2"));
    theMetadata.set(new Text("name_3"), new Text("value_3"));
    theMetadata.set(new Text("name_4"), new Text("value_4"));
    int seed = new Random().nextInt();
    try {
        // SequenceFile.Writer
        writeMetadataTest(fs, count, seed, file, CompressionType.NONE, null, theMetadata);
        SequenceFile.Metadata aMetadata = readMetadata(fs, file);
        if (!theMetadata.equals(aMetadata)) {
            LOG.info("The original metadata:\n" + theMetadata.toString());
            LOG.info("The retrieved metadata:\n" + aMetadata.toString());
            throw new RuntimeException("metadata not match:  " + 1);
        }
        // SequenceFile.RecordCompressWriter
        writeMetadataTest(fs, count, seed, recordCompressedFile, CompressionType.RECORD, codec, theMetadata);
        aMetadata = readMetadata(fs, recordCompressedFile);
        if (!theMetadata.equals(aMetadata)) {
            LOG.info("The original metadata:\n" + theMetadata.toString());
            LOG.info("The retrieved metadata:\n" + aMetadata.toString());
            throw new RuntimeException("metadata not match:  " + 2);
        }
        // SequenceFile.BlockCompressWriter
        writeMetadataTest(fs, count, seed, blockCompressedFile, CompressionType.BLOCK, codec, theMetadata);
        aMetadata = readMetadata(fs, blockCompressedFile);
        if (!theMetadata.equals(aMetadata)) {
            LOG.info("The original metadata:\n" + theMetadata.toString());
            LOG.info("The retrieved metadata:\n" + aMetadata.toString());
            throw new RuntimeException("metadata not match:  " + 3);
        }
        // SequenceFile.Sorter
        sortMetadataTest(fs, file, sortedFile, theMetadata);
        aMetadata = readMetadata(fs, recordCompressedFile);
        if (!theMetadata.equals(aMetadata)) {
            LOG.info("The original metadata:\n" + theMetadata.toString());
            LOG.info("The retrieved metadata:\n" + aMetadata.toString());
            throw new RuntimeException("metadata not match:  " + 4);
        }
    } finally {
        fs.close();
    }
    LOG.info("Successfully tested SequenceFile with metadata");
}
Also used : Metadata(org.apache.hadoop.io.SequenceFile.Metadata) Metadata(org.apache.hadoop.io.SequenceFile.Metadata) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) Test(org.junit.Test)

Example 17 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.

the class TestSequenceFileAppend method testAppendNoneCompression.

@Test(timeout = 30000)
public void testAppendNoneCompression() throws Exception {
    Path file = new Path(ROOT_PATH, "testseqappendnonecompr.seq");
    fs.delete(file, true);
    Option compressOption = Writer.compression(CompressionType.NONE);
    Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), compressOption);
    writer.append(1L, "one");
    writer.append(2L, "two");
    writer.close();
    verify2Values(file);
    writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), compressOption);
    writer.append(3L, "three");
    writer.append(4L, "four");
    writer.close();
    verifyAll4Values(file);
    // Verify failure if the compression details are different or not Provided
    try {
        writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true));
        writer.close();
        fail("Expected IllegalArgumentException for compression options");
    } catch (IllegalArgumentException iae) {
    // Expected exception. Ignore it
    }
    // Verify failure if the compression details are different
    try {
        Option wrongCompressOption = Writer.compression(CompressionType.RECORD, new GzipCodec());
        writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), wrongCompressOption);
        writer.close();
        fail("Expected IllegalArgumentException for compression options");
    } catch (IllegalArgumentException iae) {
    // Expected exception. Ignore it
    }
    // Codec should be ignored
    Option noneWithCodec = Writer.compression(CompressionType.NONE, new DefaultCodec());
    writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), noneWithCodec);
    writer.close();
    fs.deleteOnExit(file);
}
Also used : Path(org.apache.hadoop.fs.Path) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Option(org.apache.hadoop.io.SequenceFile.Writer.Option) Writer(org.apache.hadoop.io.SequenceFile.Writer) Test(org.junit.Test)

Example 18 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.

the class TestHSync method testSequenceFileSync.

/** Test hsync via SequenceFiles */
@Test
public void testSequenceFileSync() throws Exception {
    Configuration conf = new HdfsConfiguration();
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
    final FileSystem fs = cluster.getFileSystem();
    final Path p = new Path("/testSequenceFileSync/foo");
    final int len = 1 << 16;
    FSDataOutputStream out = fs.create(p, FsPermission.getDefault(), EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE, CreateFlag.SYNC_BLOCK), 4096, (short) 1, len, null);
    Writer w = SequenceFile.createWriter(new Configuration(), Writer.stream(out), Writer.keyClass(RandomDatum.class), Writer.valueClass(RandomDatum.class), Writer.compression(CompressionType.NONE, new DefaultCodec()));
    w.hflush();
    checkSyncMetric(cluster, 0);
    w.hsync();
    checkSyncMetric(cluster, 1);
    int seed = new Random().nextInt();
    RandomDatum.Generator generator = new RandomDatum.Generator(seed);
    generator.next();
    w.append(generator.getKey(), generator.getValue());
    w.hsync();
    checkSyncMetric(cluster, 2);
    w.close();
    checkSyncMetric(cluster, 2);
    out.close();
    checkSyncMetric(cluster, 3);
    cluster.shutdown();
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Configuration(org.apache.hadoop.conf.Configuration) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Random(java.util.Random) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.hadoop.io.SequenceFile.Writer) RandomDatum(org.apache.hadoop.io.RandomDatum) Test(org.junit.Test)

Example 19 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.

the class TestIFile method testIFileWriterWithCodec.

@Test
public /**
   * Create an IFile.Writer using GzipCodec since this code does not
   * have a compressor when run via the tests (ie no native libraries).
   */
void testIFileWriterWithCodec() throws Exception {
    Configuration conf = new Configuration();
    FileSystem localFs = FileSystem.getLocal(conf);
    FileSystem rfs = ((LocalFileSystem) localFs).getRaw();
    Path path = new Path(new Path("build/test.ifile"), "data");
    DefaultCodec codec = new GzipCodec();
    codec.setConf(conf);
    IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, rfs.create(path), Text.class, Text.class, codec, null);
    writer.close();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Text(org.apache.hadoop.io.Text) Test(org.junit.Test)

Example 20 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.

the class TestSequenceFileMergeProgress method runTest.

public void runTest(CompressionType compressionType) throws IOException {
    JobConf job = new JobConf();
    FileSystem fs = FileSystem.getLocal(job);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path file = new Path(dir, "test.seq");
    Path tempDir = new Path(dir, "tmp");
    fs.delete(dir, true);
    FileInputFormat.setInputPaths(job, dir);
    fs.mkdirs(tempDir);
    LongWritable tkey = new LongWritable();
    Text tval = new Text();
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class, compressionType, new DefaultCodec());
    try {
        for (int i = 0; i < RECORDS; ++i) {
            tkey.set(1234);
            tval.set("valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue");
            writer.append(tkey, tval);
        }
    } finally {
        writer.close();
    }
    long fileLength = fs.getFileStatus(file).getLen();
    LOG.info("With compression = " + compressionType + ": " + "compressed length = " + fileLength);
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, job.getOutputKeyComparator(), job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job);
    Path[] paths = new Path[] { file };
    RawKeyValueIterator rIter = sorter.merge(paths, tempDir, false);
    int count = 0;
    while (rIter.next()) {
        count++;
    }
    assertEquals(RECORDS, count);
    assertEquals(1.0f, rIter.getProgress().get(), 0.0000);
}
Also used : DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) RawKeyValueIterator(org.apache.hadoop.io.SequenceFile.Sorter.RawKeyValueIterator)

Aggregations

DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)21 Test (org.junit.Test)15 Path (org.apache.hadoop.fs.Path)10 GzipCodec (org.apache.hadoop.io.compress.GzipCodec)7 Configuration (org.apache.hadoop.conf.Configuration)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 Text (org.apache.hadoop.io.Text)6 BytesRefArrayWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable)5 BytesRefWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)5 Writer (org.apache.hadoop.io.SequenceFile.Writer)4 Random (java.util.Random)3 LongWritable (org.apache.hadoop.io.LongWritable)3 Option (org.apache.hadoop.io.SequenceFile.Writer.Option)3 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)3 RecordReader (org.apache.hadoop.mapred.RecordReader)3 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)2 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)2 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)2 KeyValueCodec (org.apache.hadoop.hbase.codec.KeyValueCodec)2 RCFile (org.apache.hadoop.hive.ql.io.RCFile)2