Search in sources :

Example 6 with LazyBinaryDeserializeRead

use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.

the class ReduceRecordSource method init.

void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum) throws Exception {
    this.vectorizedVertexNum = vectorizedVertexNum;
    ObjectInspector keyObjectInspector;
    this.reducer = reducer;
    this.vectorized = vectorized;
    this.keyTableDesc = keyTableDesc;
    if (reader instanceof KeyValueReader) {
        this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader);
    } else {
        this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader);
    }
    this.handleGroupKey = handleGroupKey;
    this.tag = tag;
    try {
        inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
        keyObjectInspector = inputKeyDeserializer.getObjectInspector();
        if (vectorized) {
            keyStructInspector = (StructObjectInspector) keyObjectInspector;
            firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
        }
        // We should initialize the SerDe with the TypeInfo when available.
        this.valueTableDesc = valueTableDesc;
        inputValueDeserializer = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputValueDeserializer, null, valueTableDesc.getProperties(), null);
        valueObjectInspector = inputValueDeserializer.getObjectInspector();
        ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
        if (vectorized) {
            /* vectorization only works with struct object inspectors */
            valueStructInspectors = (StructObjectInspector) valueObjectInspector;
            final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size();
            valueStringWriters = new ArrayList<VectorExpressionWriter>(totalColumns);
            valueStringWriters.addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(keyStructInspector)));
            valueStringWriters.addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(valueStructInspectors)));
            rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors);
            batch = batchContext.createVectorizedRowBatch();
            // Setup vectorized deserialization for the key and value.
            BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
            keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
            true, binarySortableSerDe.getSortOrders()));
            keyBinarySortableDeserializeToRow.init(0);
            final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size();
            if (valuesSize > 0) {
                valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspectors), /* useExternalBuffer */
                true));
                valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
                // Create data buffers for value bytes column vectors.
                for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
                    ColumnVector colVector = batch.cols[i];
                    if (colVector instanceof BytesColumnVector) {
                        BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
                        bytesColumnVector.initBuffer();
                    }
                }
            }
        } else {
            ois.add(keyObjectInspector);
            ois.add(valueObjectInspector);
            rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
        }
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Reduce operator initialization failed", e);
        }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) ArrayList(java.util.ArrayList) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) KeyValuesReader(org.apache.tez.runtime.library.api.KeyValuesReader) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 7 with LazyBinaryDeserializeRead

use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.

the class CheckFastRowHashMap method verifyHashMapRowsMore.

public static void verifyHashMapRowsMore(List<Object[]> rows, int[] actualToValueMap, VectorMapJoinHashMapResult hashMapResult, TypeInfo[] typeInfos, int clipIndex, boolean useExactBytes) throws IOException {
    String debugExceptionMessage = null;
    StackTraceElement[] debugStackTrace = null;
    final int count = rows.size();
    final int columnCount = typeInfos.length;
    WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
    for (int a = 0; a < count; a++) {
        int valueIndex = actualToValueMap[a];
        Object[] row = rows.get(valueIndex);
        byte[] bytes = ref.getBytes();
        int offset = (int) ref.getOffset();
        int length = ref.getLength();
        if (a == clipIndex) {
            length--;
        }
        if (useExactBytes) {
            // Use exact byte array which might generate array out of bounds...
            bytes = Arrays.copyOfRange(bytes, offset, offset + length);
            offset = 0;
        }
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(typeInfos, /* useExternalBuffer */
        false);
        lazyBinaryDeserializeRead.set(bytes, offset, length);
        boolean thrown = false;
        Exception saveException = null;
        int index = 0;
        try {
            for (index = 0; index < columnCount; index++) {
                Writable writable = (Writable) row[index];
                VerifyFastRow.verifyDeserializeRead(lazyBinaryDeserializeRead, (PrimitiveTypeInfo) typeInfos[index], writable);
            }
        } catch (Exception e) {
            thrown = true;
            saveException = e;
            lazyBinaryDeserializeRead.getDetailedReadPositionString();
            hashMapResult.getDetailedHashMapResultPositionString();
            debugExceptionMessage = saveException.getMessage();
            debugStackTrace = saveException.getStackTrace();
        }
        if (a == clipIndex) {
            if (!thrown) {
                TestCase.fail("Expecting an exception to be thrown for the clipped case...");
            } else {
                TestCase.assertTrue(saveException != null);
                if (saveException instanceof EOFException) {
                // This is the one we are expecting.
                } else if (saveException instanceof ArrayIndexOutOfBoundsException) {
                } else {
                    TestCase.fail("Expecting an EOFException to be thrown for the clipped case...");
                }
            }
        } else {
            if (thrown) {
                TestCase.fail("Not expecting an exception to be thrown for the non-clipped case... " + " exception message " + debugExceptionMessage + " stack trace " + getStackTraceAsSingleLine(debugStackTrace));
            }
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
        ref = hashMapResult.next();
        if (a == count - 1) {
            TestCase.assertTrue(ref == null);
        } else {
            TestCase.assertTrue(ref != null);
        }
    }
}
Also used : ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) WriteBuffers(org.apache.hadoop.hive.serde2.WriteBuffers) IOException(java.io.IOException) EOFException(java.io.EOFException) EOFException(java.io.EOFException) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Aggregations

LazyBinaryDeserializeRead (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)7 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)4 BinarySortableDeserializeRead (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead)3 LazyBinarySerializeWrite (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite)3 BytesWritable (org.apache.hadoop.io.BytesWritable)3 Writable (org.apache.hadoop.io.Writable)3 IOException (java.io.IOException)2 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)2 WriteBuffers (org.apache.hadoop.hive.serde2.WriteBuffers)2 BinarySortableSerializeWrite (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite)2 DeserializeRead (org.apache.hadoop.hive.serde2.fast.DeserializeRead)2 SerializeWrite (org.apache.hadoop.hive.serde2.fast.SerializeWrite)2 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)2 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)2 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)2 LazySimpleDeserializeRead (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead)2 LazySimpleSerializeWrite (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite)2 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)2 BooleanWritable (org.apache.hadoop.io.BooleanWritable)2 IntWritable (org.apache.hadoop.io.IntWritable)2