Search in sources :

Example 1 with BytesRefWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.

the class TestRCFile method writeTest.

private void writeTest(FileSystem fs, int count, Path file, byte[][] fieldsData, Configuration conf) throws IOException, SerDeException {
    cleanup();
    RCFileOutputFormat.setColumnNumber(conf, fieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length);
    for (int i = 0; i < fieldsData.length; i++) {
        BytesRefWritable cu = null;
        cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length);
        bytes.set(i, cu);
    }
    for (int i = 0; i < count; i++) {
        writer.append(bytes);
    }
    writer.close();
    long fileLen = fs.getFileStatus(file).getLen();
    System.out.println("The file size of RCFile with " + bytes.size() + " number columns and " + count + " number rows is " + fileLen);
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 2 with BytesRefWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.

the class TestRCFile method testSimpleReadAndWrite.

@Test
public void testSimpleReadAndWrite() throws IOException, SerDeException {
    cleanup();
    byte[][] record_1 = { "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
    byte[][] record_2 = { "100".getBytes("UTF-8"), "200".getBytes("UTF-8"), "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
    RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")), new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();
    Object[] expectedRecord_1 = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
    Object[] expectedRecord_2 = { new ByteWritable((byte) 100), new ShortWritable((short) 200), new IntWritable(123), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple")));
    assertEquals(new Text("block"), reader.getMetadataValueOf(new Text("apple")));
    assertEquals(new Text("dog"), reader.getMetadataValueOf(new Text("cat")));
    LongWritable rowID = new LongWritable();
    for (int i = 0; i < 2; i++) {
        reader.next(rowID);
        BytesRefArrayWritable cols = new BytesRefArrayWritable();
        reader.getCurrentRow(cols);
        cols.resetValid(8);
        Object row = serDe.deserialize(cols);
        StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
        assertEquals("Field size should be 8", 8, fieldRefs.size());
        for (int j = 0; j < fieldRefs.size(); j++) {
            Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j));
            Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData, fieldRefs.get(j).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
            if (i == 0) {
                assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]);
            } else {
                assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]);
            }
        }
    }
    reader.close();
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) RecordReader(org.apache.hadoop.mapred.RecordReader) Text(org.apache.hadoop.io.Text) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 3 with BytesRefWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.

the class TestRCFile method setup.

@Before
public void setup() throws Exception {
    conf = new Configuration();
    ColumnProjectionUtils.setReadAllColumns(conf);
    fs = FileSystem.getLocal(conf);
    dir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred");
    file = new Path(dir, "test_rcfile");
    cleanup();
    // the SerDe part is from TestLazySimpleSerDe
    serDe = new ColumnarSerDe();
    // Create the SerDe
    tbl = createProperties();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
    try {
        bytesArray = new byte[][] { "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
        s = new BytesRefArrayWritable(bytesArray.length);
        s.set(0, new BytesRefWritable("123".getBytes("UTF-8")));
        s.set(1, new BytesRefWritable("456".getBytes("UTF-8")));
        s.set(2, new BytesRefWritable("789".getBytes("UTF-8")));
        s.set(3, new BytesRefWritable("1000".getBytes("UTF-8")));
        s.set(4, new BytesRefWritable("5.3".getBytes("UTF-8")));
        s.set(5, new BytesRefWritable("hive and hadoop".getBytes("UTF-8")));
        s.set(6, new BytesRefWritable("NULL".getBytes("UTF-8")));
        s.set(7, new BytesRefWritable("NULL".getBytes("UTF-8")));
        // partial test init
        patialS.set(0, new BytesRefWritable("NULL".getBytes("UTF-8")));
        patialS.set(1, new BytesRefWritable("NULL".getBytes("UTF-8")));
        patialS.set(2, new BytesRefWritable("789".getBytes("UTF-8")));
        patialS.set(3, new BytesRefWritable("1000".getBytes("UTF-8")));
        patialS.set(4, new BytesRefWritable("NULL".getBytes("UTF-8")));
        // LazyString has no so-called NULL sequence. The value is empty string if not.
        patialS.set(5, new BytesRefWritable("".getBytes("UTF-8")));
        patialS.set(6, new BytesRefWritable("NULL".getBytes("UTF-8")));
        // LazyString has no so-called NULL sequence. The value is empty string if not.
        patialS.set(7, new BytesRefWritable("".getBytes("UTF-8")));
    } catch (UnsupportedEncodingException e) {
        throw new RuntimeException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Configuration(org.apache.hadoop.conf.Configuration) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) UnsupportedEncodingException(java.io.UnsupportedEncodingException) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable) Before(org.junit.Before)

Example 4 with BytesRefWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.

the class PerformTestRCFileAndSeqFile method writeRCFileTest.

private void writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum, CompressionCodec codec) throws IOException {
    fs.delete(file, true);
    resetRandomGenerators();
    RCFileOutputFormat.setColumnNumber(conf, columnNum);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);
    byte[][] columnRandom;
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
    columnRandom = new byte[columnNum][];
    for (int i = 0; i < columnNum; i++) {
        BytesRefWritable cu = new BytesRefWritable();
        bytes.set(i, cu);
    }
    for (int i = 0; i < rowCount; i++) {
        nextRandomRow(columnRandom, bytes);
        writer.append(bytes);
    }
    writer.close();
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 5 with BytesRefWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.

the class TestStatsSerde method testColumnarSerDe.

/**
   * Test ColumnarSerDe
   */
public void testColumnarSerDe() throws Throwable {
    try {
        System.out.println("test: testColumnarSerde");
        // Create the SerDe
        ColumnarSerDe serDe = new ColumnarSerDe();
        Configuration conf = new Configuration();
        Properties tbl = createProperties();
        SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
        // Data
        BytesRefArrayWritable braw = new BytesRefArrayWritable(8);
        String[] data = { "123", "456", "789", "1000", "5.3", "hive and hadoop", "1.", "NULL" };
        for (int i = 0; i < 8; i++) {
            braw.set(i, new BytesRefWritable(data[i].getBytes()));
        }
        // Test
        deserializeAndSerializeColumnar(serDe, braw, data);
        System.out.println("test: testColumnarSerde - OK");
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Configuration(org.apache.hadoop.conf.Configuration) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) Properties(java.util.Properties) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Aggregations

BytesRefWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)28 IOException (java.io.IOException)14 BytesRefArrayWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable)14 Configuration (org.apache.hadoop.conf.Configuration)6 Path (org.apache.hadoop.fs.Path)5 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)5 LongWritable (org.apache.hadoop.io.LongWritable)4 RecordReader (org.apache.hadoop.mapred.RecordReader)4 Test (org.junit.Test)4 Random (java.util.Random)2 RCFile (org.apache.hadoop.hive.ql.io.RCFile)2 ColumnarSerDe (org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe)2 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)2 Text (org.apache.hadoop.io.Text)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 JobConf (org.apache.hadoop.mapred.JobConf)2 ColumnEntry (com.alibaba.datax.plugin.unstructuredstorage.reader.ColumnEntry)1 PrestoException (com.facebook.presto.spi.PrestoException)1 FileWriter (java.io.FileWriter)1 PrintWriter (java.io.PrintWriter)1