Search in sources :

Example 11 with LazyBinaryDeserializeRead

use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.

the class TestVectorSerDeRow method innerTestVectorSerializeRow.

void innerTestVectorSerializeRow(Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException {
    String[] emptyScratchTypeNames = new String[0];
    VectorRandomRowSource source = new VectorRandomRowSource();
    // FUTURE: try NULLs and UNICODE.
    source.init(r, VectorRandomRowSource.SupportedTypes.ALL, 4, /* allowNulls */
    false, /* isUnicodeOk */
    false);
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
    batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorAssignRow vectorAssignRow = new VectorAssignRow();
    vectorAssignRow.init(source.typeNames());
    int fieldCount = source.typeNames().size();
    DeserializeRead deserializeRead;
    SerializeWrite serializeWrite;
    switch(serializationType) {
        case BINARY_SORTABLE:
            deserializeRead = BinarySortableDeserializeRead.ascendingNullsFirst(source.typeInfos(), false);
            serializeWrite = new BinarySortableSerializeWrite(fieldCount);
            break;
        case LAZY_BINARY:
            deserializeRead = new LazyBinaryDeserializeRead(source.typeInfos(), /* useExternalBuffer */
            false);
            serializeWrite = new LazyBinarySerializeWrite(fieldCount);
            break;
        case LAZY_SIMPLE:
            {
                StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
                // Use different separator values.
                byte[] separators = new byte[] { (byte) 9, (byte) 2, (byte) 3, (byte) 4, (byte) 5, (byte) 6, (byte) 7, (byte) 8 };
                LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector, separators);
                deserializeRead = new LazySimpleDeserializeRead(source.typeInfos(), /* useExternalBuffer */
                false, lazySerDeParams);
                serializeWrite = new LazySimpleSerializeWrite(fieldCount, lazySerDeParams);
            }
            break;
        default:
            throw new Error("Unknown serialization type " + serializationType);
    }
    VectorSerializeRow vectorSerializeRow = new VectorSerializeRow(serializeWrite);
    vectorSerializeRow.init(source.typeNames());
    Object[][] randomRows = source.randomRows(2000);
    int firstRandomRowIndex = 0;
    for (int i = 0; i < randomRows.length; i++) {
        Object[] row = randomRows[i];
        vectorAssignRow.assignRow(batch, batch.size, row);
        batch.size++;
        if (batch.size == batch.DEFAULT_SIZE) {
            serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
            firstRandomRowIndex = i + 1;
            batch.reset();
        }
    }
    if (batch.size > 0) {
        serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
    }
}
Also used : LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) DeserializeRead(org.apache.hadoop.hive.serde2.fast.DeserializeRead) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) SerializeWrite(org.apache.hadoop.hive.serde2.fast.SerializeWrite) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 12 with LazyBinaryDeserializeRead

use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.

the class VectorMapJoinCommonOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    VectorExpression.doTransientInit(bigTableFilterExpressions, hconf);
    VectorExpression.doTransientInit(bigTableKeyExpressions, hconf);
    VectorExpression.doTransientInit(bigTableValueExpressions, hconf);
    VectorExpression.doTransientInit(bigTableValueExpressions, hconf);
    /*
     * Get configuration parameters.
     */
    overflowRepeatedThreshold = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD);
    useOverflowRepeatedThreshold = (overflowRepeatedThreshold >= 0);
    /*
     * Create our vectorized copy row and deserialize row helper objects.
     */
    if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) {
        initializeFullOuterObjects();
    }
    if (smallTableValueMapping.getCount() > 0) {
        smallTableValueVectorDeserializeRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(smallTableValueMapping.getTypeInfos(), /* useExternalBuffer */
        true));
        smallTableValueVectorDeserializeRow.init(smallTableValueMapping.getOutputColumns());
    }
    if (bigTableRetainColumnMap.length > 0) {
        bigTableRetainedVectorCopy = new VectorCopyRow();
        bigTableRetainedVectorCopy.init(bigTableRetainColumnMap, bigTableRetainTypeInfos);
    }
    if (nonOuterSmallTableKeyColumnMap.length > 0) {
        nonOuterSmallTableKeyVectorCopy = new VectorCopyRow();
        nonOuterSmallTableKeyVectorCopy.init(nonOuterSmallTableKeyColumnMap, nonOuterSmallTableKeyTypeInfos);
    }
    if (outerSmallTableKeyMapping.getCount() > 0) {
        outerSmallTableKeyVectorCopy = new VectorCopyRow();
        outerSmallTableKeyVectorCopy.init(outerSmallTableKeyMapping);
    }
    /*
     * Setup the overflow batch.
     */
    overflowBatch = setupOverflowBatch();
    needCommonSetup = true;
    needFirstBatchSetup = true;
    needHashTableSetup = true;
    if (LOG.isDebugEnabled()) {
        int[] currentScratchColumns = vOutContext.currentScratchColumns();
        LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp currentScratchColumns " + Arrays.toString(currentScratchColumns));
        StructObjectInspector structOutputObjectInspector = (StructObjectInspector) outputObjInspector;
        List<? extends StructField> fields = structOutputObjectInspector.getAllStructFieldRefs();
        int i = 0;
        for (StructField field : fields) {
            LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp " + i + " field " + field.getFieldName() + " type " + field.getFieldObjectInspector().getTypeName());
            i++;
        }
    }
}
Also used : VectorCopyRow(org.apache.hadoop.hive.ql.exec.vector.VectorCopyRow) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 13 with LazyBinaryDeserializeRead

use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.

the class SparkReduceRecordHandler method init.

@Override
@SuppressWarnings("unchecked")
public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception {
    perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
    super.init(job, output, reporter);
    rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
    ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
    ObjectInspector keyObjectInspector;
    ReduceWork gWork = Utilities.getReduceWork(job);
    reducer = gWork.getReducer();
    vectorized = gWork.getVectorMode();
    // clear out any parents as reducer is the
    reducer.setParentOperators(null);
    batchContext = gWork.getVectorizedRowBatchCtx();
    // root
    isTagged = gWork.getNeedsTagging();
    try {
        keyTableDesc = gWork.getKeyDesc();
        inputKeySerDe = ReflectionUtils.newInstance(keyTableDesc.getSerDeClass(), null);
        inputKeySerDe.initialize(null, keyTableDesc.getProperties(), null);
        keyObjectInspector = inputKeySerDe.getObjectInspector();
        valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];
        if (vectorized) {
            final int maxTags = gWork.getTagToValueDesc().size();
            // CONSIDER: Cleaning up this code and eliminating the arrays.  Vectorization only handles
            // one operator tree.
            Preconditions.checkState(maxTags == 1);
            keyStructInspector = (StructObjectInspector) keyObjectInspector;
            firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
            buffer = new DataOutputBuffer();
        }
        for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
            // We should initialize the SerDe with the TypeInfo when available.
            valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
            AbstractSerDe inputValueSerDe = ReflectionUtils.newInstance(valueTableDesc[tag].getSerDeClass(), null);
            inputValueSerDe.initialize(null, valueTableDesc[tag].getProperties(), null);
            inputValueDeserializer[tag] = inputValueSerDe;
            valueObjectInspector[tag] = inputValueSerDe.getObjectInspector();
            ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
            if (vectorized) {
                /* vectorization only works with struct object inspectors */
                valueStructInspector = (StructObjectInspector) valueObjectInspector[tag];
                final int totalColumns = firstValueColumnOffset + valueStructInspector.getAllStructFieldRefs().size();
                rowObjectInspector[tag] = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspector);
                batch = gWork.getVectorizedRowBatchCtx().createVectorizedRowBatch();
                // Setup vectorized deserialization for the key and value.
                BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeySerDe;
                keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), (batchContext.getRowdataTypePhysicalVariations().length > firstValueColumnOffset) ? Arrays.copyOfRange(batchContext.getRowdataTypePhysicalVariations(), 0, firstValueColumnOffset) : batchContext.getRowdataTypePhysicalVariations(), /* useExternalBuffer */
                true, binarySortableSerDe.getSortOrders(), binarySortableSerDe.getNullMarkers(), binarySortableSerDe.getNotNullMarkers()));
                keyBinarySortableDeserializeToRow.init(0);
                final int valuesSize = valueStructInspector.getAllStructFieldRefs().size();
                if (valuesSize > 0) {
                    valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspector), (batchContext.getRowdataTypePhysicalVariations().length >= totalColumns) ? Arrays.copyOfRange(batchContext.getRowdataTypePhysicalVariations(), firstValueColumnOffset, totalColumns) : null, /* useExternalBuffer */
                    true));
                    valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
                    // Create data buffers for value bytes column vectors.
                    for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
                        ColumnVector colVector = batch.cols[i];
                        if (colVector instanceof BytesColumnVector) {
                            BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
                            bytesColumnVector.initBuffer();
                        }
                    }
                }
            } else {
                ois.add(keyObjectInspector);
                ois.add(valueObjectInspector[tag]);
                // reducer.setGroupKeyObjectInspector(keyObjectInspector);
                rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    ExecMapperContext execContext = new ExecMapperContext(job);
    localWork = gWork.getMapRedLocalWork();
    execContext.setJc(jc);
    execContext.setLocalWork(localWork);
    reducer.passExecContext(execContext);
    reducer.setReporter(rp);
    OperatorUtils.setChildrenCollector(Arrays.<Operator<? extends OperatorDesc>>asList(reducer), output);
    // initialize reduce operator tree
    try {
        LOG.info(reducer.dump(0));
        reducer.initialize(jc, rowObjectInspector);
        if (localWork != null) {
            for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) {
                dummyOp.setExecContext(execContext);
                dummyOp.initialize(jc, null);
            }
        }
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Reduce operator initialization failed", e);
        }
    }
    perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
}
Also used : ExecMapperContext(org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) ArrayList(java.util.ArrayList) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 14 with LazyBinaryDeserializeRead

use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.

the class CheckFastRowHashMap method verifyHashMapRowsMore.

public static void verifyHashMapRowsMore(List<Object[]> rows, int[] actualToValueMap, VectorMapJoinHashMapResult hashMapResult, TypeInfo[] typeInfos, int clipIndex, boolean useExactBytes) throws IOException {
    String debugExceptionMessage = null;
    StackTraceElement[] debugStackTrace = null;
    final int count = rows.size();
    final int columnCount = typeInfos.length;
    WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
    for (int a = 0; a < count; a++) {
        int valueIndex = actualToValueMap[a];
        Object[] row = rows.get(valueIndex);
        byte[] bytes = ref.getBytes();
        int offset = (int) ref.getOffset();
        int length = ref.getLength();
        if (a == clipIndex) {
            length--;
        }
        if (useExactBytes) {
            // Use exact byte array which might generate array out of bounds...
            bytes = Arrays.copyOfRange(bytes, offset, offset + length);
            offset = 0;
        }
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(typeInfos, /* useExternalBuffer */
        false);
        lazyBinaryDeserializeRead.set(bytes, offset, length);
        boolean thrown = false;
        Exception saveException = null;
        int index = 0;
        try {
            for (index = 0; index < columnCount; index++) {
                verifyRead(lazyBinaryDeserializeRead, typeInfos[index], row[index]);
            }
        } catch (Exception e) {
            thrown = true;
            saveException = e;
            lazyBinaryDeserializeRead.getDetailedReadPositionString();
            hashMapResult.getDetailedHashMapResultPositionString();
            debugExceptionMessage = saveException.getMessage();
            debugStackTrace = saveException.getStackTrace();
        }
        if (a == clipIndex) {
            if (!thrown) {
                TestCase.fail("Expecting an exception to be thrown for the clipped case...");
            } else {
                TestCase.assertTrue(saveException != null);
                if (saveException instanceof EOFException) {
                // This is the one we are expecting.
                } else if (saveException instanceof ArrayIndexOutOfBoundsException) {
                } else {
                    TestCase.fail("Expecting an EOFException to be thrown for the clipped case...");
                }
            }
        } else {
            if (thrown) {
                TestCase.fail("Not expecting an exception to be thrown for the non-clipped case... " + " exception message " + debugExceptionMessage + " stack trace " + getStackTraceAsSingleLine(debugStackTrace));
            }
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
        ref = hashMapResult.next();
        if (a == count - 1) {
            TestCase.assertTrue(ref == null);
        } else {
            TestCase.assertTrue(ref != null);
        }
    }
}
Also used : WriteBuffers(org.apache.hadoop.hive.serde2.WriteBuffers) IOException(java.io.IOException) EOFException(java.io.EOFException) EOFException(java.io.EOFException) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Aggregations

LazyBinaryDeserializeRead (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)14 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)9 BinarySortableDeserializeRead (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead)8 LazyBinarySerializeWrite (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite)8 UnionObject (org.apache.hadoop.hive.serde2.objectinspector.UnionObject)6 IOException (java.io.IOException)5 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)5 BinarySortableSerializeWrite (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite)5 DeserializeRead (org.apache.hadoop.hive.serde2.fast.DeserializeRead)5 SerializeWrite (org.apache.hadoop.hive.serde2.fast.SerializeWrite)5 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)5 LazySimpleDeserializeRead (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead)5 LazySimpleSerializeWrite (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite)5 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)4 ArrayList (java.util.ArrayList)3 Properties (java.util.Properties)3 Configuration (org.apache.hadoop.conf.Configuration)3 BinarySortableSerDe (org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe)3 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3