Search in sources :

Example 6 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hbase by apache.

the class TestCellBlockBuilder method main.

/**
   * For running a few tests of methods herein.
   * @param args
   * @throws IOException
   */
public static void main(String[] args) throws IOException {
    int count = 1024;
    int size = 10240;
    for (String arg : args) {
        if (arg.startsWith(COUNT)) {
            count = Integer.parseInt(arg.replace(COUNT, ""));
        } else if (arg.startsWith(SIZE)) {
            size = Integer.parseInt(arg.replace(SIZE, ""));
        } else {
            usage(1);
        }
    }
    CellBlockBuilder builder = new CellBlockBuilder(HBaseConfiguration.create());
    ((Log4JLogger) CellBlockBuilder.LOG).getLogger().setLevel(Level.ALL);
    timerTests(builder, count, size, new KeyValueCodec(), null);
    timerTests(builder, count, size, new KeyValueCodec(), new DefaultCodec());
    timerTests(builder, count, size, new KeyValueCodec(), new GzipCodec());
}
Also used : KeyValueCodec(org.apache.hadoop.hbase.codec.KeyValueCodec) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec)

Example 7 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hbase by apache.

the class TestCellBlockBuilder method testBuildCellBlock.

@Test
public void testBuildCellBlock() throws IOException {
    doBuildCellBlockUndoCellBlock(this.builder, new KeyValueCodec(), null);
    doBuildCellBlockUndoCellBlock(this.builder, new KeyValueCodec(), new DefaultCodec());
    doBuildCellBlockUndoCellBlock(this.builder, new KeyValueCodec(), new GzipCodec());
}
Also used : KeyValueCodec(org.apache.hadoop.hbase.codec.KeyValueCodec) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Test(org.junit.Test)

Example 8 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hive by apache.

the class TestRCFile method writeTest.

private void writeTest(FileSystem fs, int count, Path file, byte[][] fieldsData, Configuration conf) throws IOException, SerDeException {
    cleanup();
    RCFileOutputFormat.setColumnNumber(conf, fieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length);
    for (int i = 0; i < fieldsData.length; i++) {
        BytesRefWritable cu = null;
        cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length);
        bytes.set(i, cu);
    }
    for (int i = 0; i < count; i++) {
        writer.append(bytes);
    }
    writer.close();
    long fileLen = fs.getFileStatus(file).getLen();
    System.out.println("The file size of RCFile with " + bytes.size() + " number columns and " + count + " number rows is " + fileLen);
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 9 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hive by apache.

the class TestRCFile method testSimpleReadAndWrite.

@Test
public void testSimpleReadAndWrite() throws IOException, SerDeException {
    cleanup();
    byte[][] record_1 = { "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
    byte[][] record_2 = { "100".getBytes("UTF-8"), "200".getBytes("UTF-8"), "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
    RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")), new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();
    Object[] expectedRecord_1 = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
    Object[] expectedRecord_2 = { new ByteWritable((byte) 100), new ShortWritable((short) 200), new IntWritable(123), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple")));
    assertEquals(new Text("block"), reader.getMetadataValueOf(new Text("apple")));
    assertEquals(new Text("dog"), reader.getMetadataValueOf(new Text("cat")));
    LongWritable rowID = new LongWritable();
    for (int i = 0; i < 2; i++) {
        reader.next(rowID);
        BytesRefArrayWritable cols = new BytesRefArrayWritable();
        reader.getCurrentRow(cols);
        cols.resetValid(8);
        Object row = serDe.deserialize(cols);
        StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
        assertEquals("Field size should be 8", 8, fieldRefs.size());
        for (int j = 0; j < fieldRefs.size(); j++) {
            Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j));
            Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData, fieldRefs.get(j).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
            if (i == 0) {
                assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]);
            } else {
                assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]);
            }
        }
    }
    reader.close();
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) RecordReader(org.apache.hadoop.mapred.RecordReader) Text(org.apache.hadoop.io.Text) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 10 with DefaultCodec

use of org.apache.hadoop.io.compress.DefaultCodec in project hive by apache.

the class PerformTestRCFileAndSeqFile method main.

public static void main(String[] args) throws Exception {
    int count = 1000;
    String file = null;
    try {
        for (int i = 0; i < args.length; ++i) {
            // parse command line
            if (args[i] == null) {
                continue;
            } else if (args[i].equals("-count")) {
                count = Integer.parseInt(args[++i]);
            } else {
                // file is required parameter
                file = args[i];
            }
        }
        // change it to choose the appropriate file system
        boolean isLocalFS = true;
        PerformTestRCFileAndSeqFile testcase = new PerformTestRCFileAndSeqFile(isLocalFS, file);
        // change these parameters
        boolean checkCorrect = true;
        CompressionCodec codec = new DefaultCodec();
        testcase.columnMaxSize = 30;
        // testcase.testWithColumnNumber(count, 2, checkCorrect, codec);
        // testcase.testWithColumnNumber(count, 10, checkCorrect, codec);
        // testcase.testWithColumnNumber(count, 25, checkCorrect, codec);
        testcase.testWithColumnNumber(count, 40, checkCorrect, codec);
    // testcase.testWithColumnNumber(count, 50, checkCorrect, codec);
    // testcase.testWithColumnNumber(count, 80, checkCorrect, codec);
    } finally {
    }
}
Also used : DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec)

Aggregations

DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)21 Test (org.junit.Test)15 Path (org.apache.hadoop.fs.Path)10 GzipCodec (org.apache.hadoop.io.compress.GzipCodec)7 Configuration (org.apache.hadoop.conf.Configuration)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 Text (org.apache.hadoop.io.Text)6 BytesRefArrayWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable)5 BytesRefWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)5 Writer (org.apache.hadoop.io.SequenceFile.Writer)4 Random (java.util.Random)3 LongWritable (org.apache.hadoop.io.LongWritable)3 Option (org.apache.hadoop.io.SequenceFile.Writer.Option)3 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)3 RecordReader (org.apache.hadoop.mapred.RecordReader)3 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)2 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)2 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)2 KeyValueCodec (org.apache.hadoop.hbase.codec.KeyValueCodec)2 RCFile (org.apache.hadoop.hive.ql.io.RCFile)2