Search in sources :

Example 1 with ColumnarSerDe

use of org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe in project presto by prestodb.

the class RcFileTester method assertFileContentsOld.

private static <K extends LongWritable, V extends BytesRefArrayWritable> void assertFileContentsOld(Type type, TempFile tempFile, Format format, Iterable<?> expectedValues) throws Exception {
    JobConf configuration = new JobConf(new Configuration(false));
    configuration.set(READ_COLUMN_IDS_CONF_STR, "0");
    configuration.setBoolean(READ_ALL_COLUMNS, false);
    Properties schema = new Properties();
    schema.setProperty(META_TABLE_COLUMNS, "test");
    schema.setProperty(META_TABLE_COLUMN_TYPES, getJavaObjectInspector(type).getTypeName());
    Deserializer deserializer;
    if (format == Format.BINARY) {
        deserializer = new LazyBinaryColumnarSerDe();
    } else {
        deserializer = new ColumnarSerDe();
    }
    deserializer.initialize(configuration, schema);
    configuration.set(SERIALIZATION_LIB, deserializer.getClass().getName());
    InputFormat<K, V> inputFormat = new RCFileInputFormat<>();
    RecordReader<K, V> recordReader = inputFormat.getRecordReader(new FileSplit(new Path(tempFile.getFile().getAbsolutePath()), 0, tempFile.getFile().length(), (String[]) null), configuration, NULL);
    K key = recordReader.createKey();
    V value = recordReader.createValue();
    StructObjectInspector rowInspector = (StructObjectInspector) deserializer.getObjectInspector();
    StructField field = rowInspector.getStructFieldRef("test");
    Iterator<?> iterator = expectedValues.iterator();
    while (recordReader.next(key, value)) {
        Object expectedValue = iterator.next();
        Object rowData = deserializer.deserialize(value);
        Object actualValue = rowInspector.getStructFieldData(rowData, field);
        actualValue = decodeRecordReaderValue(type, actualValue);
        assertColumnValueEquals(type, actualValue, expectedValue);
    }
    assertFalse(iterator.hasNext());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) FileSplit(org.apache.hadoop.mapred.FileSplit) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) StructObject(org.apache.hadoop.hive.serde2.StructObject) JobConf(org.apache.hadoop.mapred.JobConf) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 2 with ColumnarSerDe

use of org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe in project hive by apache.

the class TestRCFile method setup.

@Before
public void setup() throws Exception {
    conf = new Configuration();
    ColumnProjectionUtils.setReadAllColumns(conf);
    fs = FileSystem.getLocal(conf);
    dir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred");
    file = new Path(dir, "test_rcfile");
    cleanup();
    // the SerDe part is from TestLazySimpleSerDe
    serDe = new ColumnarSerDe();
    // Create the SerDe
    tbl = createProperties();
    serDe.initialize(conf, tbl, null);
    bytesArray = new byte[][] { "123".getBytes(StandardCharsets.UTF_8), "456".getBytes(StandardCharsets.UTF_8), "789".getBytes(StandardCharsets.UTF_8), "1000".getBytes(StandardCharsets.UTF_8), "5.3".getBytes(StandardCharsets.UTF_8), "hive and hadoop".getBytes(StandardCharsets.UTF_8), new byte[0], "NULL".getBytes(StandardCharsets.UTF_8) };
    s = new BytesRefArrayWritable(bytesArray.length);
    s.set(0, new BytesRefWritable("123".getBytes(StandardCharsets.UTF_8)));
    s.set(1, new BytesRefWritable("456".getBytes(StandardCharsets.UTF_8)));
    s.set(2, new BytesRefWritable("789".getBytes(StandardCharsets.UTF_8)));
    s.set(3, new BytesRefWritable("1000".getBytes(StandardCharsets.UTF_8)));
    s.set(4, new BytesRefWritable("5.3".getBytes(StandardCharsets.UTF_8)));
    s.set(5, new BytesRefWritable("hive and hadoop".getBytes(StandardCharsets.UTF_8)));
    s.set(6, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
    s.set(7, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
    // partial test init
    patialS.set(0, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
    patialS.set(1, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
    patialS.set(2, new BytesRefWritable("789".getBytes(StandardCharsets.UTF_8)));
    patialS.set(3, new BytesRefWritable("1000".getBytes(StandardCharsets.UTF_8)));
    patialS.set(4, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
    // LazyString has no so-called NULL sequence. The value is empty string if not.
    patialS.set(5, new BytesRefWritable("".getBytes(StandardCharsets.UTF_8)));
    patialS.set(6, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
    // LazyString has no so-called NULL sequence. The value is empty string if not.
    patialS.set(7, new BytesRefWritable("".getBytes(StandardCharsets.UTF_8)));
    numRepeat = (int) Math.ceil((double) SequenceFile.SYNC_INTERVAL / (double) bytesArray.length);
}
Also used : Path(org.apache.hadoop.fs.Path) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Configuration(org.apache.hadoop.conf.Configuration) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable) Before(org.junit.Before)

Example 3 with ColumnarSerDe

use of org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe in project hive by apache.

the class TestStatsSerde method testColumnarSerDe.

/**
 * Test ColumnarSerDe
 */
@Test
public void testColumnarSerDe() throws Throwable {
    try {
        System.out.println("test: testColumnarSerde");
        // Create the SerDe
        ColumnarSerDe serDe = new ColumnarSerDe();
        Configuration conf = new Configuration();
        Properties tbl = createProperties();
        serDe.initialize(conf, tbl, null);
        // Data
        BytesRefArrayWritable braw = new BytesRefArrayWritable(8);
        String[] data = { "123", "456", "789", "1000", "5.3", "hive and hadoop", "1.", "NULL" };
        for (int i = 0; i < 8; i++) {
            braw.set(i, new BytesRefWritable(data[i].getBytes()));
        }
        // Test
        deserializeAndSerializeColumnar(serDe, braw, data);
        System.out.println("test: testColumnarSerde - OK");
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Configuration(org.apache.hadoop.conf.Configuration) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) Properties(java.util.Properties) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable) Test(org.junit.Test)

Example 4 with ColumnarSerDe

use of org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe in project hive by apache.

the class TestLazyBinaryColumnarSerDe method testLazyBinaryColumnarSerDeWithEmptyBinary.

@Test
public void testLazyBinaryColumnarSerDeWithEmptyBinary() throws SerDeException {
    StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
    String cols = ObjectInspectorUtils.getFieldNames(oi);
    Properties props = new Properties();
    props.setProperty(serdeConstants.LIST_COLUMNS, cols);
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
    LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
    serde.initialize(new Configuration(), props, null);
    OuterStruct outerStruct = new OuterStruct();
    outerStruct.mByte = 101;
    outerStruct.mShort = 2002;
    outerStruct.mInt = 3003;
    outerStruct.mLong = 4004l;
    outerStruct.mFloat = 5005.01f;
    outerStruct.mDouble = 6006.001d;
    outerStruct.mString = "";
    outerStruct.mBA = new byte[] {};
    outerStruct.mArray = new ArrayList<InnerStruct>();
    outerStruct.mMap = new TreeMap<String, InnerStruct>();
    outerStruct.mStruct = new InnerStruct(180018, 190019l);
    try {
        serde.serialize(outerStruct, oi);
    } catch (RuntimeException re) {
        assertEquals(re.getMessage(), "LazyBinaryColumnarSerde cannot serialize a non-null " + "zero length binary field. Consider using either LazyBinarySerde or ColumnarSerde.");
        return;
    }
    assert false;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 5 with ColumnarSerDe

use of org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe in project hive by apache.

the class TestRCFile method main.

/**
 * For debugging and testing.
 */
public static void main(String[] args) throws Exception {
    int count = 10000;
    boolean create = true;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Path file = null;
    // the SerDe part is from TestLazySimpleSerDe
    AbstractSerDe serDe = new ColumnarSerDe();
    // Create the SerDe
    Properties tbl = createProperties();
    serDe.initialize(conf, tbl, null);
    String usage = "Usage: RCFile " + "[-count N]" + " file";
    if (args.length == 0) {
        System.err.println(usage);
        System.exit(-1);
    }
    try {
        for (int i = 0; i < args.length; ++i) {
            // parse command line
            if (args[i] == null) {
                continue;
            } else if (args[i].equals("-count")) {
                count = Integer.parseInt(args[++i]);
            } else {
                // file is required parameter
                file = new Path(args[i]);
            }
        }
        if (file == null) {
            System.err.println(usage);
            System.exit(-1);
        }
        LOG.info("count = " + count);
        LOG.info("create = " + create);
        LOG.info("file = " + file);
        TestRCFile test = new TestRCFile();
        // test.performanceTest();
        test.testSimpleReadAndWrite();
        byte[][] bytesArray = new byte[][] { "123".getBytes(StandardCharsets.UTF_8), "456".getBytes(StandardCharsets.UTF_8), "789".getBytes(StandardCharsets.UTF_8), "1000".getBytes(StandardCharsets.UTF_8), "5.3".getBytes(StandardCharsets.UTF_8), "hive and hadoop".getBytes(StandardCharsets.UTF_8), new byte[0], "NULL".getBytes(StandardCharsets.UTF_8) };
        test.writeTest(fs, count, file, bytesArray);
        test.fullyReadTest(fs, count, file);
        test.partialReadTest(fs, count, file);
        System.out.println("Finished.");
    } finally {
        fs.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) Properties(java.util.Properties) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe)

Aggregations

Configuration (org.apache.hadoop.conf.Configuration)5 Properties (java.util.Properties)4 ColumnarSerDe (org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe)4 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)4 Path (org.apache.hadoop.fs.Path)3 BytesRefArrayWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable)3 BytesRefWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)2 Test (org.junit.Test)2 Map (java.util.Map)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)1 RCFileInputFormat (org.apache.hadoop.hive.ql.io.RCFileInputFormat)1 AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)1 Deserializer (org.apache.hadoop.hive.serde2.Deserializer)1 StructObject (org.apache.hadoop.hive.serde2.StructObject)1 LazyBinaryColumnarSerDe (org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe)1 DateWritableV2 (org.apache.hadoop.hive.serde2.io.DateWritableV2)1 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)1 HiveIntervalDayTimeWritable (org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable)1 HiveIntervalYearMonthWritable (org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable)1