Search in sources :

Example 31 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestLazyBinarySerDe method testLongerSchemaDeserialization.

/**
   * Test longer schema deserialization where a smaller struct is serialized and
   * it is then deserialized with a bigger struct Here the serialized struct has
   * 9 fields and we deserialized to a struct of 10 fields.
   */
void testLongerSchemaDeserialization(Random r) throws Throwable {
    StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
    AbstractSerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    serde1.getObjectInspector();
    StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA);
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
    AbstractSerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();
    int num = 100;
    for (int itest = 0; itest < num; itest++) {
        MyTestClass t = new MyTestClass();
        ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
        t.randomFill(r, extraTypeInfo);
        BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1);
        Object output = serde2.deserialize(bw);
        if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) {
            System.out.println("structs      = " + SerDeUtils.getJSONString(t, rowOI1));
            System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
            System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));
            assertEquals(t, output);
        }
    }
}
Also used : AbstractPrimitiveLazyObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector) LazyBinaryMapObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) JavaBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MyTestClass(org.apache.hadoop.hive.serde2.binarysortable.MyTestClass) ExtraTypeInfo(org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo) BytesWritable(org.apache.hadoop.io.BytesWritable) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 32 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestLazyBinarySerDe method testLazyBinaryMap.

void testLazyBinaryMap(Random r) throws Throwable {
    StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA);
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
    AbstractSerDe serde = getSerDe(fieldNames, fieldTypes);
    ObjectInspector serdeOI = serde.getObjectInspector();
    StructObjectInspector soi1 = (StructObjectInspector) serdeOI;
    List<? extends StructField> fields1 = soi1.getAllStructFieldRefs();
    LazyBinaryMapObjectInspector lazympoi = (LazyBinaryMapObjectInspector) fields1.get(MyTestClassBigger.mapPos).getFieldObjectInspector();
    ObjectInspector lazympkeyoi = lazympoi.getMapKeyObjectInspector();
    ObjectInspector lazympvalueoi = lazympoi.getMapValueObjectInspector();
    StructObjectInspector soi2 = rowOI;
    List<? extends StructField> fields2 = soi2.getAllStructFieldRefs();
    MapObjectInspector inputmpoi = (MapObjectInspector) fields2.get(MyTestClassBigger.mapPos).getFieldObjectInspector();
    ObjectInspector inputmpkeyoi = inputmpoi.getMapKeyObjectInspector();
    ObjectInspector inputmpvalueoi = inputmpoi.getMapValueObjectInspector();
    int num = 100;
    for (int testi = 0; testi < num; testi++) {
        Map<String, List<MyTestInnerStruct>> mp = new LinkedHashMap<String, List<MyTestInnerStruct>>();
        int randFields = r.nextInt(10);
        for (int i = 0; i < randFields; i++) {
            String key = MyTestPrimitiveClass.getRandString(r);
            int randField = r.nextInt(10);
            List<MyTestInnerStruct> value = randField > 4 ? null : getRandStructArray(r);
            mp.put(key, value);
        }
        MyTestClassBigger t = new MyTestClassBigger();
        t.myMap = mp;
        BytesWritable bw = (BytesWritable) serde.serialize(t, rowOI);
        Object output = serde.deserialize(bw);
        Object lazyobj = soi1.getStructFieldData(output, fields1.get(MyTestClassBigger.mapPos));
        Map<?, ?> outputmp = lazympoi.getMap(lazyobj);
        if (outputmp.size() != mp.size()) {
            throw new RuntimeException("Map size changed from " + mp.size() + " to " + outputmp.size() + " after serialization!");
        }
        for (Map.Entry<?, ?> entryinput : mp.entrySet()) {
            boolean bEqual = false;
            for (Map.Entry<?, ?> entryoutput : outputmp.entrySet()) {
                // find the same key
                if (0 == ObjectInspectorUtils.compare(entryoutput.getKey(), lazympkeyoi, entryinput.getKey(), inputmpkeyoi)) {
                    if (0 != ObjectInspectorUtils.compare(entryoutput.getValue(), lazympvalueoi, entryinput.getValue(), inputmpvalueoi)) {
                        assertEquals(entryoutput.getValue(), entryinput.getValue());
                    } else {
                        bEqual = true;
                    }
                    break;
                }
            }
            if (!bEqual) {
                throw new RuntimeException("Could not find matched key in deserialized map : " + entryinput.getKey());
            }
        }
    }
}
Also used : AbstractPrimitiveLazyObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector) LazyBinaryMapObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) JavaBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MyTestInnerStruct(org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct) LazyBinaryMapObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) LinkedHashMap(java.util.LinkedHashMap) LazyBinaryMapObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 33 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestInputOutputFormat method testDefaultTypes.

@Test
public void testDefaultTypes() throws Exception {
    Properties properties = new Properties();
    properties.setProperty("columns", "str,str2");
    properties.setProperty("columns.types", "string:string");
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    AbstractSerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class, true, properties, Reporter.NULL);
    writer.write(serde.serialize(new StringRow("owen"), inspector));
    writer.write(serde.serialize(new StringRow("beth"), inspector));
    writer.write(serde.serialize(new StringRow("laurel"), inspector));
    writer.write(serde.serialize(new StringRow("hazen"), inspector));
    writer.write(serde.serialize(new StringRow("colin"), inspector));
    writer.write(serde.serialize(new StringRow("miles"), inspector));
    writer.close(true);
    serde = new OrcSerde();
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    // read the whole file
    conf.set("columns", StringRow.getColumnNamesProperty());
    conf.set("columns.types", StringRow.getColumnTypesProperty());
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Writable value = (Writable) reader.createValue();
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    StringObjectInspector strInspector = (StringObjectInspector) fields.get(0).getFieldObjectInspector();
    assertEquals(true, reader.next(key, value));
    assertEquals("owen", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("beth", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("laurel", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("hazen", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("colin", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("miles", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(false, reader.next(key, value));
    reader.close();
}
Also used : NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) IntWritable(org.apache.hadoop.io.IntWritable) Properties(java.util.Properties) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) InputSplit(org.apache.hadoop.mapred.InputSplit) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 34 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestInputOutputFormat method testInOutFormat.

@Test
public void testInOutFormat() throws Exception {
    Properties properties = new Properties();
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    AbstractSerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true, properties, Reporter.NULL);
    writer.write(serde.serialize(new MyRow(1, 2), inspector));
    writer.write(serde.serialize(new MyRow(2, 2), inspector));
    writer.write(serde.serialize(new MyRow(3, 2), inspector));
    writer.close(true);
    serde = new OrcSerde();
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<x:int,y:int>", inspector.getTypeName());
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    // the the validate input method
    ArrayList<FileStatus> fileList = new ArrayList<FileStatus>();
    assertEquals(false, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(testFilePath));
    assertEquals(true, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(workDir));
    assertEquals(false, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    // read the whole file
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Writable value = (Writable) reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    IntObjectInspector intInspector = (IntObjectInspector) fields.get(0).getFieldObjectInspector();
    while (reader.next(key, value)) {
        assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(0))));
        assertEquals(2, intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    assertEquals(1.0, reader.getProgress(), 0.00001);
    reader.close();
    // read just the first column
    ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(0));
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
        assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(value, fields.get(0))));
        assertEquals(null, inspector.getStructFieldData(value, fields.get(1)));
    }
    assertEquals(3, rowNum);
    reader.close();
    // test the mapping of empty string to all columns
    ColumnProjectionUtils.setReadAllColumns(conf);
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
        assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(value, fields.get(0))));
        assertEquals(2, intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    reader.close();
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) ArrayList(java.util.ArrayList) NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) IntWritable(org.apache.hadoop.io.IntWritable) Properties(java.util.Properties) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) HiveConf(org.apache.hadoop.hive.conf.HiveConf) InputSplit(org.apache.hadoop.mapred.InputSplit) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 35 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestInputOutputFormat method testSplitElimination.

@Test
public void testSplitElimination() throws Exception {
    Properties properties = new Properties();
    properties.setProperty("columns", "z,r");
    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    AbstractSerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    conf.setInt("mapred.max.split.size", 50);
    RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("z", PredicateLeaf.Type.LONG, new Long(0)).end().build();
    conf.set("sarg.pushdown", toKryo(sarg));
    conf.set("hive.io.file.readcolumn.names", "z,r");
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(0, splits.length);
}
Also used : SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) Properties(java.util.Properties) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) RecordWriter(org.apache.hadoop.mapred.RecordWriter) InputSplit(org.apache.hadoop.mapred.InputSplit) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Aggregations

AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)40 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)22 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)16 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)15 Properties (java.util.Properties)12 ArrayList (java.util.ArrayList)10 BytesWritable (org.apache.hadoop.io.BytesWritable)9 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Writable (org.apache.hadoop.io.Writable)8 IOException (java.io.IOException)7 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)7 InputSplit (org.apache.hadoop.mapred.InputSplit)7 Test (org.junit.Test)7 AbstractPrimitiveLazyObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector)6 LazyBinaryMapObjectInspector (org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector)6 JavaBinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector)6 WritableBinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector)6 Path (org.apache.hadoop.fs.Path)5 RecordWriter (org.apache.hadoop.mapred.RecordWriter)5 ConcurrentModificationException (java.util.ConcurrentModificationException)4