Search in sources :

Example 71 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestLazyBinaryFast method testLazyBinaryFastCase.

public void testLazyBinaryFastCase(int caseNum, boolean doNonRandomFill, Random r, SerdeRandomRowSource.SupportedTypes supportedTypes, int depth) throws Throwable {
    SerdeRandomRowSource source = new SerdeRandomRowSource();
    source.init(r, supportedTypes, depth);
    int rowCount = 100;
    Object[][] rows = source.randomRows(rowCount);
    if (doNonRandomFill) {
        MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
    }
    StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
    TypeInfo[] typeInfos = source.typeInfos();
    int columnCount = typeInfos.length;
    int writeColumnCount = columnCount;
    StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
    boolean doWriteFewerColumns = r.nextBoolean();
    if (doWriteFewerColumns) {
        writeColumnCount = 1 + r.nextInt(columnCount);
        if (writeColumnCount == columnCount) {
            doWriteFewerColumns = false;
        } else {
            writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
        }
    }
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
    TestLazyBinarySerDe testLazyBinarySerDe = new TestLazyBinarySerDe();
    AbstractSerDe serde = testLazyBinarySerDe.getSerDe(fieldNames, fieldTypes);
    AbstractSerDe serde_fewer = null;
    if (doWriteFewerColumns) {
        String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
        String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
        serde_fewer = testLazyBinarySerDe.getSerDe(partialFieldNames, partialFieldTypes);
        ;
    }
    testLazyBinaryFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, typeInfos, /* useIncludeColumns */
    false, /* doWriteFewerColumns */
    false, r);
    testLazyBinaryFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, typeInfos, /* useIncludeColumns */
    true, /* doWriteFewerColumns */
    false, r);
/*
     * Can the LazyBinary format really tolerate writing fewer columns?
     */
// if (doWriteFewerColumns) {
// testLazyBinaryFast(
// source, rows,
// serde, rowStructObjectInspector,
// serde_fewer, writeRowStructObjectInspector,
// primitiveTypeInfos,
// /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r);
// testLazyBinaryFast(
// source, rows,
// serde, rowStructObjectInspector,
// serde_fewer, writeRowStructObjectInspector,
// primitiveTypeInfos,
// /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r);
// }
}
Also used : SerdeRandomRowSource(org.apache.hadoop.hive.serde2.SerdeRandomRowSource) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 72 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestLazyBinarySerDe method testLongerSchemaDeserialization.

/**
 * Test longer schema deserialization where a smaller struct is serialized and
 * it is then deserialized with a bigger struct Here the serialized struct has
 * 9 fields and we deserialized to a struct of 10 fields.
 */
void testLongerSchemaDeserialization(Random r) throws Throwable {
    StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
    AbstractSerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    serde1.getObjectInspector();
    StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA);
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
    AbstractSerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();
    int num = 100;
    for (int itest = 0; itest < num; itest++) {
        MyTestClass t = new MyTestClass();
        ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
        t.randomFill(r, extraTypeInfo);
        BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1);
        Object output = serde2.deserialize(bw);
        if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) {
            System.out.println("structs      = " + SerDeUtils.getJSONString(t, rowOI1));
            System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
            System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));
            assertEquals(t, output);
        }
    }
}
Also used : AbstractPrimitiveLazyObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector) LazyBinaryMapObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) JavaBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MyTestClass(org.apache.hadoop.hive.serde2.binarysortable.MyTestClass) ExtraTypeInfo(org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo) BytesWritable(org.apache.hadoop.io.BytesWritable) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 73 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestLazyBinarySerDe method testLazyBinaryMap.

void testLazyBinaryMap(Random r) throws Throwable {
    StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA);
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
    AbstractSerDe serde = getSerDe(fieldNames, fieldTypes);
    ObjectInspector serdeOI = serde.getObjectInspector();
    StructObjectInspector soi1 = (StructObjectInspector) serdeOI;
    List<? extends StructField> fields1 = soi1.getAllStructFieldRefs();
    LazyBinaryMapObjectInspector lazympoi = (LazyBinaryMapObjectInspector) fields1.get(MyTestClassBigger.mapPos).getFieldObjectInspector();
    ObjectInspector lazympkeyoi = lazympoi.getMapKeyObjectInspector();
    ObjectInspector lazympvalueoi = lazympoi.getMapValueObjectInspector();
    StructObjectInspector soi2 = rowOI;
    List<? extends StructField> fields2 = soi2.getAllStructFieldRefs();
    MapObjectInspector inputmpoi = (MapObjectInspector) fields2.get(MyTestClassBigger.mapPos).getFieldObjectInspector();
    ObjectInspector inputmpkeyoi = inputmpoi.getMapKeyObjectInspector();
    ObjectInspector inputmpvalueoi = inputmpoi.getMapValueObjectInspector();
    int num = 100;
    for (int testi = 0; testi < num; testi++) {
        Map<String, List<MyTestInnerStruct>> mp = new LinkedHashMap<String, List<MyTestInnerStruct>>();
        int randFields = r.nextInt(10);
        for (int i = 0; i < randFields; i++) {
            String key = MyTestPrimitiveClass.getRandString(r);
            int randField = r.nextInt(10);
            List<MyTestInnerStruct> value = randField > 4 ? null : getRandStructArray(r);
            mp.put(key, value);
        }
        MyTestClassBigger t = new MyTestClassBigger();
        t.myMap = mp;
        BytesWritable bw = (BytesWritable) serde.serialize(t, rowOI);
        Object output = serde.deserialize(bw);
        Object lazyobj = soi1.getStructFieldData(output, fields1.get(MyTestClassBigger.mapPos));
        Map<?, ?> outputmp = lazympoi.getMap(lazyobj);
        if (outputmp.size() != mp.size()) {
            throw new RuntimeException("Map size changed from " + mp.size() + " to " + outputmp.size() + " after serialization!");
        }
        for (Map.Entry<?, ?> entryinput : mp.entrySet()) {
            boolean bEqual = false;
            for (Map.Entry<?, ?> entryoutput : outputmp.entrySet()) {
                // find the same key
                if (0 == ObjectInspectorUtils.compare(entryoutput.getKey(), lazympkeyoi, entryinput.getKey(), inputmpkeyoi)) {
                    if (0 != ObjectInspectorUtils.compare(entryoutput.getValue(), lazympvalueoi, entryinput.getValue(), inputmpvalueoi)) {
                        assertEquals(entryoutput.getValue(), entryinput.getValue());
                    } else {
                        bEqual = true;
                    }
                    break;
                }
            }
            if (!bEqual) {
                throw new RuntimeException("Could not find matched key in deserialized map : " + entryinput.getKey());
            }
        }
    }
}
Also used : AbstractPrimitiveLazyObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector) LazyBinaryMapObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) JavaBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MyTestInnerStruct(org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct) LazyBinaryMapObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) LinkedHashMap(java.util.LinkedHashMap) LazyBinaryMapObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 74 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestLazyBinarySerDe method testShorterSchemaDeserialization.

/**
 * Test shorter schema deserialization where a bigger struct is serialized and
 * it is then deserialized with a smaller struct. Here the serialized struct
 * has 10 fields and we deserialized to a struct of 9 fields.
 */
private void testShorterSchemaDeserialization(Random r) throws Throwable {
    StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA);
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
    AbstractSerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    serde1.getObjectInspector();
    StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
    AbstractSerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();
    int num = 100;
    for (int itest = 0; itest < num; itest++) {
        MyTestClassBigger t = new MyTestClassBigger();
        ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
        t.randomFill(r, extraTypeInfo);
        BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1);
        Object output = serde2.deserialize(bw);
        if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) {
            System.out.println("structs      = " + SerDeUtils.getJSONString(t, rowOI1));
            System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
            System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));
            assertEquals(t, output);
        }
    }
}
Also used : AbstractPrimitiveLazyObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector) LazyBinaryMapObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) JavaBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MyTestClass(org.apache.hadoop.hive.serde2.binarysortable.MyTestClass) ExtraTypeInfo(org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo) BytesWritable(org.apache.hadoop.io.BytesWritable) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 75 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class AbstractRecordWriter method init.

@Override
public void init(StreamingConnection conn, long minWriteId, long maxWriteId, int statementId) throws StreamingException {
    if (conn == null) {
        throw new StreamingException("Streaming connection cannot be null during record writer initialization");
    }
    this.conn = conn;
    this.curBatchMinWriteId = minWriteId;
    this.curBatchMaxWriteId = maxWriteId;
    this.statementId = statementId;
    this.conf = conn.getHiveConf();
    this.defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME);
    this.table = conn.getTable();
    String location = table.getSd().getLocation();
    try {
        URI uri = new URI(location);
        this.fs = FileSystem.newInstance(uri, conf);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Created new filesystem instance: {}", System.identityHashCode(this.fs));
        }
    } catch (URISyntaxException e) {
        throw new StreamingException("Unable to create URI from location: " + location, e);
    } catch (IOException e) {
        throw new StreamingException("Unable to get filesystem for location: " + location, e);
    }
    this.inputColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList());
    this.inputTypes = table.getSd().getCols().stream().map(FieldSchema::getType).collect(Collectors.toList());
    if (conn.isPartitionedTable() && conn.isDynamicPartitioning()) {
        this.partitionColumns = table.getPartitionKeys().stream().map(FieldSchema::getName).collect(Collectors.toList());
        this.inputColumns.addAll(partitionColumns);
        this.inputTypes.addAll(table.getPartitionKeys().stream().map(FieldSchema::getType).collect(Collectors.toList()));
    }
    this.fullyQualifiedTableName = Warehouse.getQualifiedName(table.getDbName(), table.getTableName());
    String outFormatName = this.table.getSd().getOutputFormat();
    try {
        this.acidOutputFormat = (AcidOutputFormat<?, ?>) ReflectionUtils.newInstance(JavaUtils.loadClass(outFormatName), conf);
    } catch (Exception e) {
        String shadePrefix = conf.getVar(HiveConf.ConfVars.HIVE_CLASSLOADER_SHADE_PREFIX);
        if (shadePrefix != null && !shadePrefix.trim().isEmpty()) {
            try {
                LOG.info("Shade prefix: {} specified. Using as fallback to load {}..", shadePrefix, outFormatName);
                this.acidOutputFormat = (AcidOutputFormat<?, ?>) ReflectionUtils.newInstance(JavaUtils.loadClass(shadePrefix, outFormatName), conf);
            } catch (ClassNotFoundException e1) {
                throw new StreamingException(e.getMessage(), e);
            }
        } else {
            throw new StreamingException(e.getMessage(), e);
        }
    }
    setupMemoryMonitoring();
    try {
        final AbstractSerDe serDe = createSerde();
        this.inputRowObjectInspector = (StructObjectInspector) serDe.getObjectInspector();
        if (conn.isPartitionedTable() && conn.isDynamicPartitioning()) {
            preparePartitioningFields();
            int dpStartCol = inputRowObjectInspector.getAllStructFieldRefs().size() - table.getPartitionKeys().size();
            this.outputRowObjectInspector = new SubStructObjectInspector(inputRowObjectInspector, 0, dpStartCol);
        } else {
            this.outputRowObjectInspector = inputRowObjectInspector;
        }
        prepareBucketingFields();
    } catch (SerDeException e) {
        throw new StreamingException("Unable to create SerDe", e);
    }
}
Also used : FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) URI(java.net.URI) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) SubStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SubStructObjectInspector) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)61 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)31 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)26 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)23 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)19 ArrayList (java.util.ArrayList)18 Properties (java.util.Properties)16 IOException (java.io.IOException)15 BytesWritable (org.apache.hadoop.io.BytesWritable)12 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)9 Writable (org.apache.hadoop.io.Writable)8 Test (org.junit.Test)8 Path (org.apache.hadoop.fs.Path)7 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)7 AbstractPrimitiveLazyObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector)6 LazyBinaryMapObjectInspector (org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector)6 JavaBinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector)6 WritableBinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector)6 LinkedHashMap (java.util.LinkedHashMap)5 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)5