Search in sources :

Example 1 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.

the class TestSequenceFile method testZlibSequenceFile.

/** Unit tests for SequenceFile. */
@Test
public void testZlibSequenceFile() throws Exception {
    LOG.info("Testing SequenceFile with DefaultCodec");
    compressedSeqFileTest(new DefaultCodec());
    LOG.info("Successfully tested SequenceFile with DefaultCodec");
}
Also used : DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Test(org.junit.Test)

Example 2 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.

the class TestSequenceFileAppend method testAppendBlockCompression.

@Test(timeout = 30000)
public void testAppendBlockCompression() throws Exception {
    GenericTestUtils.assumeInNativeProfile();
    Path file = new Path(ROOT_PATH, "testseqappendblockcompr.seq");
    fs.delete(file, true);
    Option compressOption = Writer.compression(CompressionType.BLOCK, new GzipCodec());
    Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), compressOption);
    writer.append(1L, "one");
    writer.append(2L, "two");
    writer.close();
    verify2Values(file);
    writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), compressOption);
    writer.append(3L, "three");
    writer.append(4L, "four");
    writer.close();
    verifyAll4Values(file);
    // Verify failure if the compression details are different or not Provided
    try {
        writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true));
        writer.close();
        fail("Expected IllegalArgumentException for compression options");
    } catch (IllegalArgumentException IAE) {
    // Expected exception. Ignore it
    }
    // Verify failure if the compression details are different
    try {
        Option wrongCompressOption = Writer.compression(CompressionType.RECORD, new GzipCodec());
        writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), wrongCompressOption);
        writer.close();
        fail("Expected IllegalArgumentException for compression options");
    } catch (IllegalArgumentException IAE) {
    // Expected exception. Ignore it
    }
    try {
        Option wrongCompressOption = Writer.compression(CompressionType.BLOCK, new DefaultCodec());
        writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), wrongCompressOption);
        writer.close();
        fail("Expected IllegalArgumentException for compression options");
    } catch (IllegalArgumentException IAE) {
    // Expected exception. Ignore it
    }
    fs.deleteOnExit(file);
}
Also used : Path(org.apache.hadoop.fs.Path) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Option(org.apache.hadoop.io.SequenceFile.Writer.Option) Writer(org.apache.hadoop.io.SequenceFile.Writer) Test(org.junit.Test)

Example 3 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.

the class TestTextOutputFormat method testCompress.

/**
   * test compressed file
   * @throws IOException
   */
@Test
public void testCompress() throws IOException {
    JobConf job = new JobConf();
    job.set(JobContext.TASK_ATTEMPT_ID, attempt);
    job.set(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.COMPRESS, "true");
    FileOutputFormat.setOutputPath(job, workDir.getParent().getParent());
    FileOutputFormat.setWorkOutputPath(job, workDir);
    FileSystem fs = workDir.getFileSystem(job);
    if (!fs.mkdirs(workDir)) {
        fail("Failed to create output directory");
    }
    String file = "test_compress.txt";
    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    TextOutputFormat<Object, Object> theOutputFormat = new TextOutputFormat<Object, Object>();
    RecordWriter<Object, Object> theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    Text key1 = new Text("key1");
    Text key2 = new Text("key2");
    Text val1 = new Text("val1");
    Text val2 = new Text("val2");
    NullWritable nullWritable = NullWritable.get();
    try {
        theRecordWriter.write(key1, val1);
        theRecordWriter.write(null, nullWritable);
        theRecordWriter.write(null, val1);
        theRecordWriter.write(nullWritable, val2);
        theRecordWriter.write(key2, nullWritable);
        theRecordWriter.write(key1, null);
        theRecordWriter.write(null, null);
        theRecordWriter.write(key2, val2);
    } finally {
        theRecordWriter.close(reporter);
    }
    StringBuffer expectedOutput = new StringBuffer();
    expectedOutput.append(key1).append("\t").append(val1).append("\n");
    expectedOutput.append(val1).append("\n");
    expectedOutput.append(val2).append("\n");
    expectedOutput.append(key2).append("\n");
    expectedOutput.append(key1).append("\n");
    expectedOutput.append(key2).append("\t").append(val2).append("\n");
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(job);
    Path expectedFile = new Path(workDir, file + codec.getDefaultExtension());
    final FileInputStream istream = new FileInputStream(expectedFile.toString());
    CompressionInputStream cistream = codec.createInputStream(istream);
    LineReader reader = new LineReader(cistream);
    String output = "";
    Text out = new Text();
    while (reader.readLine(out) > 0) {
        output += out;
        output += "\n";
    }
    reader.close();
    assertEquals(expectedOutput.toString(), output);
}
Also used : Path(org.apache.hadoop.fs.Path) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Text(org.apache.hadoop.io.Text) NullWritable(org.apache.hadoop.io.NullWritable) FileInputStream(java.io.FileInputStream) FileSystem(org.apache.hadoop.fs.FileSystem) LineReader(org.apache.hadoop.util.LineReader) Test(org.junit.Test)

Example 4 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.

the class TestReduceTask method testValueIteratorWithCompression.

@Test
public void testValueIteratorWithCompression() throws Exception {
    Path tmpDir = new Path("build/test/test.reduce.task.compression");
    Configuration conf = new Configuration();
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(conf);
    for (Pair[] testCase : testCases) {
        runValueIterator(tmpDir, testCase, conf, codec);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Test(org.junit.Test)

Example 5 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hadoop by apache.

the class TestIFile method testIFileReaderWithCodec.

@Test
public /** Same as above but create a reader. */
void testIFileReaderWithCodec() throws Exception {
    Configuration conf = new Configuration();
    FileSystem localFs = FileSystem.getLocal(conf);
    FileSystem rfs = ((LocalFileSystem) localFs).getRaw();
    Path path = new Path(new Path("build/test.ifile"), "data");
    DefaultCodec codec = new GzipCodec();
    codec.setConf(conf);
    FSDataOutputStream out = rfs.create(path);
    IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, out, Text.class, Text.class, codec, null);
    writer.close();
    FSDataInputStream in = rfs.open(path);
    IFile.Reader<Text, Text> reader = new IFile.Reader<Text, Text>(conf, in, rfs.getFileStatus(path).getLen(), codec, null);
    reader.close();
    // test check sum 
    byte[] ab = new byte[100];
    int readed = reader.checksumIn.readWithChecksum(ab, 0, ab.length);
    assertEquals(readed, reader.checksumIn.getChecksum().length);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Text(org.apache.hadoop.io.Text) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Aggregations

DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)21 Test (org.junit.Test)15 Path (org.apache.hadoop.fs.Path)10 GzipCodec (org.apache.hadoop.io.compress.GzipCodec)7 Configuration (org.apache.hadoop.conf.Configuration)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 Text (org.apache.hadoop.io.Text)6 BytesRefArrayWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable)5 BytesRefWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)5 Writer (org.apache.hadoop.io.SequenceFile.Writer)4 Random (java.util.Random)3 LongWritable (org.apache.hadoop.io.LongWritable)3 Option (org.apache.hadoop.io.SequenceFile.Writer.Option)3 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)3 RecordReader (org.apache.hadoop.mapred.RecordReader)3 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)2 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)2 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)2 KeyValueCodec (org.apache.hadoop.hbase.codec.KeyValueCodec)2 RCFile (org.apache.hadoop.hive.ql.io.RCFile)2