Search in sources :

Example 6 with GzipCodec

use of org.apache.hadoop.io.compress.GzipCodec in project hadoop by apache.

the class TestConcatenatedCompressedInput method testGzip.

/**
   * Test using Hadoop's original, native-zlib gzip codec for reading.
   */
@Test
public void testGzip() throws IOException {
    JobConf jobConf = new JobConf(defaultConf);
    CompressionCodec gzip = new GzipCodec();
    ReflectionUtils.setConf(gzip, jobConf);
    localFs.delete(workDir, true);
    // alternative:
    if (org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor.class == gzip.getDecompressorType()) {
        System.out.println(COLOR_BR_RED + "testGzip() using native-zlib Decompressor (" + gzip.getDecompressorType() + ")" + COLOR_NORMAL);
    } else {
        LOG.warn("testGzip() skipped:  native (C/C++) libs not loaded");
        return;
    }
    /*
 *      // THIS IS BUGGY: omits 2nd/3rd gzip headers; screws up 2nd/3rd CRCs--
 *      //                see https://issues.apache.org/jira/browse/HADOOP-6799
 *  Path fnHDFS = new Path(workDir, "concat" + gzip.getDefaultExtension());
 *  //OutputStream out = localFs.create(fnHDFS);
 *  //GzipCodec.GzipOutputStream gzOStm = new GzipCodec.GzipOutputStream(out);
 *      // can just combine those two lines, probably
 *  //GzipCodec.GzipOutputStream gzOStm =
 *  //  new GzipCodec.GzipOutputStream(localFs.create(fnHDFS));
 *      // oops, no:  this is a protected helper class; need to access
 *      //   it via createOutputStream() instead:
 *  OutputStream out = localFs.create(fnHDFS);
 *  Compressor gzCmp = gzip.createCompressor();
 *  CompressionOutputStream gzOStm = gzip.createOutputStream(out, gzCmp);
 *      // this SHOULD be going to HDFS:  got out from localFs == HDFS
 *      //   ...yup, works
 *  gzOStm.write("first gzip concat\n member\nwith three lines\n".getBytes());
 *  gzOStm.finish();
 *  gzOStm.resetState();
 *  gzOStm.write("2nd gzip concat member\n".getBytes());
 *  gzOStm.finish();
 *  gzOStm.resetState();
 *  gzOStm.write("gzip concat\nmember #3\n".getBytes());
 *  gzOStm.close();
 *      //
 *  String fn = "hdfs-to-local-concat" + gzip.getDefaultExtension();
 *  Path fnLocal = new Path(System.getProperty("test.concat.data","/tmp"), fn);
 *  localFs.copyToLocalFile(fnHDFS, fnLocal);
 */
    // copy prebuilt (correct!) version of concat.gz to HDFS
    final String fn = "concat" + gzip.getDefaultExtension();
    Path fnLocal = new Path(System.getProperty("test.concat.data", "/tmp"), fn);
    Path fnHDFS = new Path(workDir, fn);
    localFs.copyFromLocalFile(fnLocal, fnHDFS);
    writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "this is a test\nof gzip\n");
    FileInputFormat.setInputPaths(jobConf, workDir);
    TextInputFormat format = new TextInputFormat();
    format.configure(jobConf);
    InputSplit[] splits = format.getSplits(jobConf, 100);
    assertEquals("compressed splits == 2", 2, splits.length);
    FileSplit tmp = (FileSplit) splits[0];
    if (tmp.getPath().getName().equals("part2.txt.gz")) {
        splits[0] = splits[1];
        splits[1] = tmp;
    }
    List<Text> results = readSplit(format, splits[0], jobConf);
    assertEquals("splits[0] num lines", 6, results.size());
    assertEquals("splits[0][5]", "member #3", results.get(5).toString());
    results = readSplit(format, splits[1], jobConf);
    assertEquals("splits[1] num lines", 2, results.size());
    assertEquals("splits[1][0]", "this is a test", results.get(0).toString());
    assertEquals("splits[1][1]", "of gzip", results.get(1).toString());
}
Also used : Path(org.apache.hadoop.fs.Path) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) Text(org.apache.hadoop.io.Text) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) Test(org.junit.Test)

Example 7 with GzipCodec

use of org.apache.hadoop.io.compress.GzipCodec in project hadoop by apache.

the class TestIFile method testIFileReaderWithCodec.

@Test
public /** Same as above but create a reader. */
void testIFileReaderWithCodec() throws Exception {
    Configuration conf = new Configuration();
    FileSystem localFs = FileSystem.getLocal(conf);
    FileSystem rfs = ((LocalFileSystem) localFs).getRaw();
    Path path = new Path(new Path("build/test.ifile"), "data");
    DefaultCodec codec = new GzipCodec();
    codec.setConf(conf);
    FSDataOutputStream out = rfs.create(path);
    IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, out, Text.class, Text.class, codec, null);
    writer.close();
    FSDataInputStream in = rfs.open(path);
    IFile.Reader<Text, Text> reader = new IFile.Reader<Text, Text>(conf, in, rfs.getFileStatus(path).getLen(), codec, null);
    reader.close();
    // test check sum 
    byte[] ab = new byte[100];
    int readed = reader.checksumIn.readWithChecksum(ab, 0, ab.length);
    assertEquals(readed, reader.checksumIn.getChecksum().length);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Text(org.apache.hadoop.io.Text) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 8 with GzipCodec

use of org.apache.hadoop.io.compress.GzipCodec in project hbase by apache.

the class TestCellBlockBuilder method main.

/**
   * For running a few tests of methods herein.
   * @param args
   * @throws IOException
   */
public static void main(String[] args) throws IOException {
    int count = 1024;
    int size = 10240;
    for (String arg : args) {
        if (arg.startsWith(COUNT)) {
            count = Integer.parseInt(arg.replace(COUNT, ""));
        } else if (arg.startsWith(SIZE)) {
            size = Integer.parseInt(arg.replace(SIZE, ""));
        } else {
            usage(1);
        }
    }
    CellBlockBuilder builder = new CellBlockBuilder(HBaseConfiguration.create());
    ((Log4JLogger) CellBlockBuilder.LOG).getLogger().setLevel(Level.ALL);
    timerTests(builder, count, size, new KeyValueCodec(), null);
    timerTests(builder, count, size, new KeyValueCodec(), new DefaultCodec());
    timerTests(builder, count, size, new KeyValueCodec(), new GzipCodec());
}
Also used : KeyValueCodec(org.apache.hadoop.hbase.codec.KeyValueCodec) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec)

Example 9 with GzipCodec

use of org.apache.hadoop.io.compress.GzipCodec in project hbase by apache.

the class TestCellBlockBuilder method testBuildCellBlock.

@Test
public void testBuildCellBlock() throws IOException {
    doBuildCellBlockUndoCellBlock(this.builder, new KeyValueCodec(), null);
    doBuildCellBlockUndoCellBlock(this.builder, new KeyValueCodec(), new DefaultCodec());
    doBuildCellBlockUndoCellBlock(this.builder, new KeyValueCodec(), new GzipCodec());
}
Also used : KeyValueCodec(org.apache.hadoop.hbase.codec.KeyValueCodec) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Test(org.junit.Test)

Example 10 with GzipCodec

use of org.apache.hadoop.io.compress.GzipCodec in project carbondata by apache.

the class CSVInputFormatTest method generateCompressFiles.

/**
   * generate compressed files, no need to call this method.
   * @throws Exception
   */
public void generateCompressFiles() throws Exception {
    String pwd = new File("src/test/resources/csv").getCanonicalPath();
    String inputFile = pwd + "/data.csv";
    FileInputStream input = new FileInputStream(inputFile);
    Configuration conf = new Configuration();
    // .gz
    String outputFile = pwd + "/data.csv.gz";
    FileOutputStream output = new FileOutputStream(outputFile);
    GzipCodec gzip = new GzipCodec();
    gzip.setConf(conf);
    CompressionOutputStream outputStream = gzip.createOutputStream(output);
    int i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();
    // .bz2
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.bz2";
    output = new FileOutputStream(outputFile);
    BZip2Codec bzip2 = new BZip2Codec();
    bzip2.setConf(conf);
    outputStream = bzip2.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();
    // .snappy
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.snappy";
    output = new FileOutputStream(outputFile);
    SnappyCodec snappy = new SnappyCodec();
    snappy.setConf(conf);
    outputStream = snappy.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();
    //.lz4
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.lz4";
    output = new FileOutputStream(outputFile);
    Lz4Codec lz4 = new Lz4Codec();
    lz4.setConf(conf);
    outputStream = lz4.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();
}
Also used : Lz4Codec(org.apache.hadoop.io.compress.Lz4Codec) CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) Configuration(org.apache.hadoop.conf.Configuration) FileOutputStream(java.io.FileOutputStream) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) File(java.io.File) SnappyCodec(org.apache.hadoop.io.compress.SnappyCodec) FileInputStream(java.io.FileInputStream)

Aggregations

GzipCodec (org.apache.hadoop.io.compress.GzipCodec)15 Test (org.junit.Test)13 Path (org.apache.hadoop.fs.Path)12 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)7 Text (org.apache.hadoop.io.Text)6 Writer (org.apache.hadoop.io.SequenceFile.Writer)5 Option (org.apache.hadoop.io.SequenceFile.Writer.Option)5 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)5 Configuration (org.apache.hadoop.conf.Configuration)4 FileInputStream (java.io.FileInputStream)3 FileSystem (org.apache.hadoop.fs.FileSystem)2 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)2 KeyValueCodec (org.apache.hadoop.hbase.codec.KeyValueCodec)2 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 Inflater (java.util.zip.Inflater)1 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 Reader (org.apache.hadoop.io.SequenceFile.Reader)1