Examples with SerDe - org.apache.hadoop.hive.serde2.SerDe

Example 1 with SerDe

use of org.apache.hadoop.hive.serde2.SerDe in project hive by apache.

the class TestInputOutputFormat method testMROutput.

@Test
public void testMROutput() throws Exception {
    Properties properties = new Properties();
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    AbstractSerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "z,r");
    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(1));
    conf.set("columns", "z,r");
    conf.set("columns.types", "int:struct<x:int,y:int>");
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    StructObjectInspector inner = (StructObjectInspector) fields.get(1).getFieldObjectInspector();
    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    IntObjectInspector intInspector = (IntObjectInspector) fields.get(0).getFieldObjectInspector();
    while (reader.next(key, value)) {
        assertEquals(null, inspector.getStructFieldData(value, fields.get(0)));
        Object sub = inspector.getStructFieldData(value, fields.get(1));
        assertEquals(3 * rowNum + 1, intInspector.get(inner.getStructFieldData(sub, inFields.get(0))));
        assertEquals(3 * rowNum + 2, intInspector.get(inner.getStructFieldData(sub, inFields.get(1))));
        rowNum += 1;
    }
    assertEquals(3, rowNum);
    reader.close();
}

Also used : IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) Properties(java.util.Properties) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) RecordWriter(org.apache.hadoop.mapred.RecordWriter) InputSplit(org.apache.hadoop.mapred.InputSplit) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 2 with SerDe

use of org.apache.hadoop.hive.serde2.SerDe in project hive by apache.

the class TestInputOutputFormat method testRowNumberUniquenessInDifferentSplits.

/**
   * also see {@link TestOrcFile#testPredicatePushdown()}
   * This tests that {@link RecordReader#getRowNumber()} works with multiple splits
   * @throws Exception
   */
@Test
public void testRowNumberUniquenessInDifferentSplits() throws Exception {
    Properties properties = new Properties();
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    // Save the conf variable values so that they can be restored later.
    long oldDefaultStripeSize = conf.getLong(OrcConf.STRIPE_SIZE.getHiveConfName(), -1L);
    long oldMaxSplitSize = conf.getLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, -1L);
    // Set the conf variable values for this test.
    // 10000 bytes per stripe
    long newStripeSize = 10000L;
    // 1024 bytes per split
    long newMaxSplitSize = 100L;
    conf.setLong(OrcConf.STRIPE_SIZE.getHiveConfName(), newStripeSize);
    conf.setLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, newMaxSplitSize);
    AbstractSerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true, properties, Reporter.NULL);
    // The following loop should create 20 stripes in the orc file.
    for (int i = 0; i < newStripeSize * 10; ++i) {
        writer.write(serde.serialize(new MyRow(i, i + 1), inspector));
    }
    writer.close(true);
    serde = new OrcSerde();
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<x:int,y:int>", inspector.getTypeName());
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    int numExpectedSplits = 20;
    InputSplit[] splits = in.getSplits(conf, numExpectedSplits);
    assertEquals(numExpectedSplits, splits.length);
    for (int i = 0; i < numExpectedSplits; ++i) {
        OrcSplit split = (OrcSplit) splits[i];
        Reader.Options orcReaderOptions = new Reader.Options();
        orcReaderOptions.range(split.getStart(), split.getLength());
        OrcFile.ReaderOptions qlReaderOptions = OrcFile.readerOptions(conf).maxLength(split.getFileLength());
        Reader reader = OrcFile.createReader(split.getPath(), qlReaderOptions);
        RecordReader recordReader = reader.rowsOptions(orcReaderOptions);
        for (int j = 0; recordReader.hasNext(); j++) {
            long rowNum = (i * 5000) + j;
            long rowNumActual = recordReader.getRowNumber();
            assertEquals("rowNum=" + rowNum, rowNum, rowNumActual);
            Object row = recordReader.next(null);
        }
        recordReader.close();
    }
    // Reset the conf variable values that we changed for this test.
    if (oldDefaultStripeSize != -1L) {
        conf.setLong(OrcConf.STRIPE_SIZE.getHiveConfName(), oldDefaultStripeSize);
    } else {
        // this means that nothing was set for default stripe size previously, so we should unset it.
        conf.unset(OrcConf.STRIPE_SIZE.getHiveConfName());
    }
    if (oldMaxSplitSize != -1L) {
        conf.setLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, oldMaxSplitSize);
    } else {
        // this means that nothing was set for default stripe size previously, so we should unset it.
        conf.unset(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname);
    }
}

Also used : Properties(java.util.Properties) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) InputSplit(org.apache.hadoop.mapred.InputSplit) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 3 with SerDe

use of org.apache.hadoop.hive.serde2.SerDe in project hive by apache.

the class TestRCFile method setup.

@Before
public void setup() throws Exception {
    conf = new Configuration();
    ColumnProjectionUtils.setReadAllColumns(conf);
    fs = FileSystem.getLocal(conf);
    dir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred");
    file = new Path(dir, "test_rcfile");
    cleanup();
    // the SerDe part is from TestLazySimpleSerDe
    serDe = new ColumnarSerDe();
    // Create the SerDe
    tbl = createProperties();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
    try {
        bytesArray = new byte[][] { "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
        s = new BytesRefArrayWritable(bytesArray.length);
        s.set(0, new BytesRefWritable("123".getBytes("UTF-8")));
        s.set(1, new BytesRefWritable("456".getBytes("UTF-8")));
        s.set(2, new BytesRefWritable("789".getBytes("UTF-8")));
        s.set(3, new BytesRefWritable("1000".getBytes("UTF-8")));
        s.set(4, new BytesRefWritable("5.3".getBytes("UTF-8")));
        s.set(5, new BytesRefWritable("hive and hadoop".getBytes("UTF-8")));
        s.set(6, new BytesRefWritable("NULL".getBytes("UTF-8")));
        s.set(7, new BytesRefWritable("NULL".getBytes("UTF-8")));
        // partial test init
        patialS.set(0, new BytesRefWritable("NULL".getBytes("UTF-8")));
        patialS.set(1, new BytesRefWritable("NULL".getBytes("UTF-8")));
        patialS.set(2, new BytesRefWritable("789".getBytes("UTF-8")));
        patialS.set(3, new BytesRefWritable("1000".getBytes("UTF-8")));
        patialS.set(4, new BytesRefWritable("NULL".getBytes("UTF-8")));
        // LazyString has no so-called NULL sequence. The value is empty string if not.
        patialS.set(5, new BytesRefWritable("".getBytes("UTF-8")));
        patialS.set(6, new BytesRefWritable("NULL".getBytes("UTF-8")));
        // LazyString has no so-called NULL sequence. The value is empty string if not.
        patialS.set(7, new BytesRefWritable("".getBytes("UTF-8")));
    } catch (UnsupportedEncodingException e) {
        throw new RuntimeException(e);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Configuration(org.apache.hadoop.conf.Configuration) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) UnsupportedEncodingException(java.io.UnsupportedEncodingException) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable) Before(org.junit.Before)

Example 4 with SerDe

use of org.apache.hadoop.hive.serde2.SerDe in project hive by apache.

the class TestStatsSerde method testColumnarSerDe.

/**
   * Test ColumnarSerDe
   */
public void testColumnarSerDe() throws Throwable {
    try {
        System.out.println("test: testColumnarSerde");
        // Create the SerDe
        ColumnarSerDe serDe = new ColumnarSerDe();
        Configuration conf = new Configuration();
        Properties tbl = createProperties();
        SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
        // Data
        BytesRefArrayWritable braw = new BytesRefArrayWritable(8);
        String[] data = { "123", "456", "789", "1000", "5.3", "hive and hadoop", "1.", "NULL" };
        for (int i = 0; i < 8; i++) {
            braw.set(i, new BytesRefWritable(data[i].getBytes()));
        }
        // Test
        deserializeAndSerializeColumnar(serDe, braw, data);
        System.out.println("test: testColumnarSerde - OK");
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}

Also used : ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Configuration(org.apache.hadoop.conf.Configuration) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) Properties(java.util.Properties) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 5 with SerDe

use of org.apache.hadoop.hive.serde2.SerDe in project hive by apache.

the class TestLazyBinarySerDe method testLazyBinarySerDe.

/**
   * Test the LazyBinarySerDe.
   *
   * @param rows
   *          array of structs to be serialized
   * @param rowOI
   *          array of struct object inspectors
   * @param serde
   *          the serde
   * @throws Throwable
   */
private void testLazyBinarySerDe(Object[] rows, ObjectInspector rowOI, AbstractSerDe serde) throws Throwable {
    ObjectInspector serdeOI = serde.getObjectInspector();
    // Try to serialize
    BytesWritable[] bytes = new BytesWritable[rows.length];
    for (int i = 0; i < rows.length; i++) {
        BytesWritable s = (BytesWritable) serde.serialize(rows[i], rowOI);
        bytes[i] = new BytesWritable();
        bytes[i].set(s);
    }
    // Try to deserialize
    Object[] deserialized = new Object[rows.length];
    for (int i = 0; i < rows.length; i++) {
        deserialized[i] = serde.deserialize(bytes[i]);
        if (0 != ObjectInspectorUtils.compare(rows[i], rowOI, deserialized[i], serdeOI)) {
            System.out.println("structs[" + i + "] = " + SerDeUtils.getJSONString(rows[i], rowOI));
            System.out.println("deserialized[" + i + "] = " + SerDeUtils.getJSONString(deserialized[i], serdeOI));
            System.out.println("serialized[" + i + "] = " + TestBinarySortableSerDe.hexString(bytes[i]));
            assertEquals(rows[i], deserialized[i]);
        }
    }
}

Also used : AbstractPrimitiveLazyObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector) LazyBinaryMapObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) JavaBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable)

Aggregations

Properties (java.util.Properties)78 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)69 ArrayList (java.util.ArrayList)56 Configuration (org.apache.hadoop.conf.Configuration)56 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)49 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)35 Text (org.apache.hadoop.io.Text)35 BytesWritable (org.apache.hadoop.io.BytesWritable)26 AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)25 Test (org.junit.Test)21 List (java.util.List)20 Put (org.apache.hadoop.hbase.client.Put)19 LazySimpleSerDe (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)18 Path (org.apache.hadoop.fs.Path)15 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)15 Writable (org.apache.hadoop.io.Writable)15 IOException (java.io.IOException)14 KeyValue (org.apache.hadoop.hbase.KeyValue)14 Result (org.apache.hadoop.hbase.client.Result)14 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)14