use of org.apache.hadoop.io.compress.GzipCodec in project hadoop by apache.
the class TestConcatenatedCompressedInput method testGzip.
/**
* Test using Hadoop's original, native-zlib gzip codec for reading.
*/
@Test
public void testGzip() throws IOException {
JobConf jobConf = new JobConf(defaultConf);
CompressionCodec gzip = new GzipCodec();
ReflectionUtils.setConf(gzip, jobConf);
localFs.delete(workDir, true);
// alternative:
if (org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor.class == gzip.getDecompressorType()) {
System.out.println(COLOR_BR_RED + "testGzip() using native-zlib Decompressor (" + gzip.getDecompressorType() + ")" + COLOR_NORMAL);
} else {
LOG.warn("testGzip() skipped: native (C/C++) libs not loaded");
return;
}
/*
* // THIS IS BUGGY: omits 2nd/3rd gzip headers; screws up 2nd/3rd CRCs--
* // see https://issues.apache.org/jira/browse/HADOOP-6799
* Path fnHDFS = new Path(workDir, "concat" + gzip.getDefaultExtension());
* //OutputStream out = localFs.create(fnHDFS);
* //GzipCodec.GzipOutputStream gzOStm = new GzipCodec.GzipOutputStream(out);
* // can just combine those two lines, probably
* //GzipCodec.GzipOutputStream gzOStm =
* // new GzipCodec.GzipOutputStream(localFs.create(fnHDFS));
* // oops, no: this is a protected helper class; need to access
* // it via createOutputStream() instead:
* OutputStream out = localFs.create(fnHDFS);
* Compressor gzCmp = gzip.createCompressor();
* CompressionOutputStream gzOStm = gzip.createOutputStream(out, gzCmp);
* // this SHOULD be going to HDFS: got out from localFs == HDFS
* // ...yup, works
* gzOStm.write("first gzip concat\n member\nwith three lines\n".getBytes());
* gzOStm.finish();
* gzOStm.resetState();
* gzOStm.write("2nd gzip concat member\n".getBytes());
* gzOStm.finish();
* gzOStm.resetState();
* gzOStm.write("gzip concat\nmember #3\n".getBytes());
* gzOStm.close();
* //
* String fn = "hdfs-to-local-concat" + gzip.getDefaultExtension();
* Path fnLocal = new Path(System.getProperty("test.concat.data","/tmp"), fn);
* localFs.copyToLocalFile(fnHDFS, fnLocal);
*/
// copy prebuilt (correct!) version of concat.gz to HDFS
final String fn = "concat" + gzip.getDefaultExtension();
Path fnLocal = new Path(System.getProperty("test.concat.data", "/tmp"), fn);
Path fnHDFS = new Path(workDir, fn);
localFs.copyFromLocalFile(fnLocal, fnHDFS);
writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "this is a test\nof gzip\n");
FileInputFormat.setInputPaths(jobConf, workDir);
TextInputFormat format = new TextInputFormat();
format.configure(jobConf);
InputSplit[] splits = format.getSplits(jobConf, 100);
assertEquals("compressed splits == 2", 2, splits.length);
FileSplit tmp = (FileSplit) splits[0];
if (tmp.getPath().getName().equals("part2.txt.gz")) {
splits[0] = splits[1];
splits[1] = tmp;
}
List<Text> results = readSplit(format, splits[0], jobConf);
assertEquals("splits[0] num lines", 6, results.size());
assertEquals("splits[0][5]", "member #3", results.get(5).toString());
results = readSplit(format, splits[1], jobConf);
assertEquals("splits[1] num lines", 2, results.size());
assertEquals("splits[1][0]", "this is a test", results.get(0).toString());
assertEquals("splits[1][1]", "of gzip", results.get(1).toString());
}
use of org.apache.hadoop.io.compress.GzipCodec in project hadoop by apache.
the class TestIFile method testIFileReaderWithCodec.
@Test
public /** Same as above but create a reader. */
void testIFileReaderWithCodec() throws Exception {
Configuration conf = new Configuration();
FileSystem localFs = FileSystem.getLocal(conf);
FileSystem rfs = ((LocalFileSystem) localFs).getRaw();
Path path = new Path(new Path("build/test.ifile"), "data");
DefaultCodec codec = new GzipCodec();
codec.setConf(conf);
FSDataOutputStream out = rfs.create(path);
IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, out, Text.class, Text.class, codec, null);
writer.close();
FSDataInputStream in = rfs.open(path);
IFile.Reader<Text, Text> reader = new IFile.Reader<Text, Text>(conf, in, rfs.getFileStatus(path).getLen(), codec, null);
reader.close();
// test check sum
byte[] ab = new byte[100];
int readed = reader.checksumIn.readWithChecksum(ab, 0, ab.length);
assertEquals(readed, reader.checksumIn.getChecksum().length);
}
use of org.apache.hadoop.io.compress.GzipCodec in project hbase by apache.
the class TestCellBlockBuilder method main.
/**
* For running a few tests of methods herein.
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
int count = 1024;
int size = 10240;
for (String arg : args) {
if (arg.startsWith(COUNT)) {
count = Integer.parseInt(arg.replace(COUNT, ""));
} else if (arg.startsWith(SIZE)) {
size = Integer.parseInt(arg.replace(SIZE, ""));
} else {
usage(1);
}
}
CellBlockBuilder builder = new CellBlockBuilder(HBaseConfiguration.create());
((Log4JLogger) CellBlockBuilder.LOG).getLogger().setLevel(Level.ALL);
timerTests(builder, count, size, new KeyValueCodec(), null);
timerTests(builder, count, size, new KeyValueCodec(), new DefaultCodec());
timerTests(builder, count, size, new KeyValueCodec(), new GzipCodec());
}
use of org.apache.hadoop.io.compress.GzipCodec in project hbase by apache.
the class TestCellBlockBuilder method testBuildCellBlock.
@Test
public void testBuildCellBlock() throws IOException {
doBuildCellBlockUndoCellBlock(this.builder, new KeyValueCodec(), null);
doBuildCellBlockUndoCellBlock(this.builder, new KeyValueCodec(), new DefaultCodec());
doBuildCellBlockUndoCellBlock(this.builder, new KeyValueCodec(), new GzipCodec());
}
use of org.apache.hadoop.io.compress.GzipCodec in project carbondata by apache.
the class CSVInputFormatTest method generateCompressFiles.
/**
* generate compressed files, no need to call this method.
* @throws Exception
*/
public void generateCompressFiles() throws Exception {
String pwd = new File("src/test/resources/csv").getCanonicalPath();
String inputFile = pwd + "/data.csv";
FileInputStream input = new FileInputStream(inputFile);
Configuration conf = new Configuration();
// .gz
String outputFile = pwd + "/data.csv.gz";
FileOutputStream output = new FileOutputStream(outputFile);
GzipCodec gzip = new GzipCodec();
gzip.setConf(conf);
CompressionOutputStream outputStream = gzip.createOutputStream(output);
int i = -1;
while ((i = input.read()) != -1) {
outputStream.write(i);
}
outputStream.close();
input.close();
// .bz2
input = new FileInputStream(inputFile);
outputFile = pwd + "/data.csv.bz2";
output = new FileOutputStream(outputFile);
BZip2Codec bzip2 = new BZip2Codec();
bzip2.setConf(conf);
outputStream = bzip2.createOutputStream(output);
i = -1;
while ((i = input.read()) != -1) {
outputStream.write(i);
}
outputStream.close();
input.close();
// .snappy
input = new FileInputStream(inputFile);
outputFile = pwd + "/data.csv.snappy";
output = new FileOutputStream(outputFile);
SnappyCodec snappy = new SnappyCodec();
snappy.setConf(conf);
outputStream = snappy.createOutputStream(output);
i = -1;
while ((i = input.read()) != -1) {
outputStream.write(i);
}
outputStream.close();
input.close();
//.lz4
input = new FileInputStream(inputFile);
outputFile = pwd + "/data.csv.lz4";
output = new FileOutputStream(outputFile);
Lz4Codec lz4 = new Lz4Codec();
lz4.setConf(conf);
outputStream = lz4.createOutputStream(output);
i = -1;
while ((i = input.read()) != -1) {
outputStream.write(i);
}
outputStream.close();
input.close();
}
Aggregations