Search in sources :

Example 1 with BinarySortableSerializeWrite

use of org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite in project hive by apache.

the class TestVectorSerDeRow method testVectorSerializeRow.

void testVectorSerializeRow(Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException {
    String[] emptyScratchTypeNames = new String[0];
    VectorRandomRowSource source = new VectorRandomRowSource();
    source.init(r);
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
    batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorAssignRow vectorAssignRow = new VectorAssignRow();
    vectorAssignRow.init(source.typeNames());
    int fieldCount = source.typeNames().size();
    DeserializeRead deserializeRead;
    SerializeWrite serializeWrite;
    switch(serializationType) {
        case BINARY_SORTABLE:
            deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
            false);
            serializeWrite = new BinarySortableSerializeWrite(fieldCount);
            break;
        case LAZY_BINARY:
            deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
            false);
            serializeWrite = new LazyBinarySerializeWrite(fieldCount);
            break;
        case LAZY_SIMPLE:
            {
                StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
                LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector);
                byte separator = (byte) '\t';
                deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
                false, separator, lazySerDeParams);
                serializeWrite = new LazySimpleSerializeWrite(fieldCount, separator, lazySerDeParams);
            }
            break;
        default:
            throw new Error("Unknown serialization type " + serializationType);
    }
    VectorSerializeRow vectorSerializeRow = new VectorSerializeRow(serializeWrite);
    vectorSerializeRow.init(source.typeNames());
    Object[][] randomRows = source.randomRows(100000);
    int firstRandomRowIndex = 0;
    for (int i = 0; i < randomRows.length; i++) {
        Object[] row = randomRows[i];
        vectorAssignRow.assignRow(batch, batch.size, row);
        batch.size++;
        if (batch.size == batch.DEFAULT_SIZE) {
            serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
            firstRandomRowIndex = i + 1;
            batch.reset();
        }
    }
    if (batch.size > 0) {
        serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
    }
}
Also used : LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) DeserializeRead(org.apache.hadoop.hive.serde2.fast.DeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) SerializeWrite(org.apache.hadoop.hive.serde2.fast.SerializeWrite) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 2 with BinarySortableSerializeWrite

use of org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite in project hive by apache.

the class TestVectorMapJoinFastRowHashMap method addAndVerifyRows.

private void addAndVerifyRows(VectorRandomRowSource valueSource, Object[][] rows, VectorMapJoinFastHashTable map, HashTableKeyType hashTableKeyType, VerifyFastRowHashMap verifyTable, String[] keyTypeNames, boolean doClipping, boolean useExactBytes) throws HiveException, IOException, SerDeException {
    final int keyCount = keyTypeNames.length;
    PrimitiveTypeInfo[] keyPrimitiveTypeInfos = new PrimitiveTypeInfo[keyCount];
    PrimitiveCategory[] keyPrimitiveCategories = new PrimitiveCategory[keyCount];
    ArrayList<ObjectInspector> keyPrimitiveObjectInspectorList = new ArrayList<ObjectInspector>(keyCount);
    for (int i = 0; i < keyCount; i++) {
        PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(keyTypeNames[i]);
        keyPrimitiveTypeInfos[i] = primitiveTypeInfo;
        PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
        keyPrimitiveCategories[i] = primitiveCategory;
        keyPrimitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
    }
    boolean[] keyColumnSortOrderIsDesc = new boolean[keyCount];
    Arrays.fill(keyColumnSortOrderIsDesc, false);
    byte[] keyColumnNullMarker = new byte[keyCount];
    Arrays.fill(keyColumnNullMarker, BinarySortableSerDe.ZERO);
    byte[] keyColumnNotNullMarker = new byte[keyCount];
    Arrays.fill(keyColumnNotNullMarker, BinarySortableSerDe.ONE);
    BinarySortableSerializeWrite keySerializeWrite = new BinarySortableSerializeWrite(keyColumnSortOrderIsDesc, keyColumnNullMarker, keyColumnNotNullMarker);
    PrimitiveTypeInfo[] valuePrimitiveTypeInfos = valueSource.primitiveTypeInfos();
    final int columnCount = valuePrimitiveTypeInfos.length;
    SerializeWrite valueSerializeWrite = new LazyBinarySerializeWrite(columnCount);
    final int count = rows.length;
    for (int i = 0; i < count; i++) {
        Object[] valueRow = rows[i];
        Output valueOutput = new Output();
        ((LazyBinarySerializeWrite) valueSerializeWrite).set(valueOutput);
        for (int index = 0; index < columnCount; index++) {
            Writable writable = (Writable) valueRow[index];
            VerifyFastRow.serializeWrite(valueSerializeWrite, valuePrimitiveTypeInfos[index], writable);
        }
        byte[] value = Arrays.copyOf(valueOutput.getData(), valueOutput.getLength());
        // Add a new key or add a value to an existing key?
        byte[] key;
        if (random.nextBoolean() || verifyTable.getCount() == 0) {
            Object[] keyRow = VectorRandomRowSource.randomRow(keyCount, random, keyPrimitiveObjectInspectorList, keyPrimitiveCategories, keyPrimitiveTypeInfos);
            Output keyOutput = new Output();
            keySerializeWrite.set(keyOutput);
            for (int index = 0; index < keyCount; index++) {
                Writable writable = (Writable) keyRow[index];
                VerifyFastRow.serializeWrite(keySerializeWrite, keyPrimitiveTypeInfos[index], writable);
            }
            key = Arrays.copyOf(keyOutput.getData(), keyOutput.getLength());
            verifyTable.add(key, keyRow, value, valueRow);
        } else {
            key = verifyTable.addRandomExisting(value, valueRow, random);
        }
        // Serialize keyRow into key bytes.
        BytesWritable keyWritable = new BytesWritable(key);
        BytesWritable valueWritable = new BytesWritable(value);
        map.putRow(keyWritable, valueWritable);
    // verifyTable.verify(map);
    }
    verifyTable.verify(map, hashTableKeyType, valuePrimitiveTypeInfos, doClipping, useExactBytes, random);
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) SerializeWrite(org.apache.hadoop.hive.serde2.fast.SerializeWrite) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite)

Example 3 with BinarySortableSerializeWrite

use of org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite in project hive by apache.

the class VectorizationContext method getStructInExpression.

private VectorExpression getStructInExpression(List<ExprNodeDesc> childExpr, ExprNodeDesc colExpr, TypeInfo colTypeInfo, List<ExprNodeDesc> inChildren, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    VectorExpression expr = null;
    StructTypeInfo structTypeInfo = (StructTypeInfo) colTypeInfo;
    ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
    final int fieldCount = fieldTypeInfos.size();
    ColumnVector.Type[] fieldVectorColumnTypes = new ColumnVector.Type[fieldCount];
    InConstantType[] fieldInConstantTypes = new InConstantType[fieldCount];
    for (int f = 0; f < fieldCount; f++) {
        TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
        // Only primitive fields supports for now.
        if (fieldTypeInfo.getCategory() != Category.PRIMITIVE) {
            return null;
        }
        // We are going to serialize using the 4 basic types.
        ColumnVector.Type fieldVectorColumnType = getColumnVectorTypeFromTypeInfo(fieldTypeInfo);
        fieldVectorColumnTypes[f] = fieldVectorColumnType;
        // We currently evaluate the IN (..) constants in special ways.
        PrimitiveCategory fieldPrimitiveCategory = ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory();
        InConstantType inConstantType = getInConstantTypeFromPrimitiveCategory(fieldPrimitiveCategory);
        fieldInConstantTypes[f] = inConstantType;
    }
    Output buffer = new Output();
    BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(fieldCount);
    final int inChildrenCount = inChildren.size();
    byte[][] serializedInChildren = new byte[inChildrenCount][];
    try {
        for (int i = 0; i < inChildrenCount; i++) {
            final ExprNodeDesc node = inChildren.get(i);
            final Object[] constants;
            if (node instanceof ExprNodeConstantDesc) {
                ExprNodeConstantDesc constNode = (ExprNodeConstantDesc) node;
                ConstantObjectInspector output = constNode.getWritableObjectInspector();
                constants = ((List<?>) output.getWritableConstantValue()).toArray();
            } else {
                ExprNodeGenericFuncDesc exprNode = (ExprNodeGenericFuncDesc) node;
                ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprNode);
                ObjectInspector output = evaluator.initialize(exprNode.getWritableObjectInspector());
                constants = (Object[]) evaluator.evaluate(null);
            }
            binarySortableSerializeWrite.set(buffer);
            for (int f = 0; f < fieldCount; f++) {
                Object constant = constants[f];
                if (constant == null) {
                    binarySortableSerializeWrite.writeNull();
                } else {
                    InConstantType inConstantType = fieldInConstantTypes[f];
                    switch(inConstantType) {
                        case STRING_FAMILY:
                            {
                                byte[] bytes;
                                if (constant instanceof Text) {
                                    Text text = (Text) constant;
                                    bytes = text.getBytes();
                                    binarySortableSerializeWrite.writeString(bytes, 0, text.getLength());
                                } else {
                                    throw new HiveException("Unexpected constant String type " + constant.getClass().getSimpleName());
                                }
                            }
                            break;
                        case INT_FAMILY:
                            {
                                long value;
                                if (constant instanceof IntWritable) {
                                    value = ((IntWritable) constant).get();
                                } else if (constant instanceof LongWritable) {
                                    value = ((LongWritable) constant).get();
                                } else {
                                    throw new HiveException("Unexpected constant Long type " + constant.getClass().getSimpleName());
                                }
                                binarySortableSerializeWrite.writeLong(value);
                            }
                            break;
                        case FLOAT_FAMILY:
                            {
                                double value;
                                if (constant instanceof DoubleWritable) {
                                    value = ((DoubleWritable) constant).get();
                                } else {
                                    throw new HiveException("Unexpected constant Double type " + constant.getClass().getSimpleName());
                                }
                                binarySortableSerializeWrite.writeDouble(value);
                            }
                            break;
                        // UNDONE...
                        case DATE:
                        case TIMESTAMP:
                        case DECIMAL:
                        default:
                            throw new RuntimeException("Unexpected IN constant type " + inConstantType.name());
                    }
                }
            }
            serializedInChildren[i] = Arrays.copyOfRange(buffer.getData(), 0, buffer.getLength());
        }
    } catch (Exception e) {
        throw new HiveException(e);
    }
    // Create a single child representing the scratch column where we will
    // generate the serialized keys of the batch.
    int scratchBytesCol = ocm.allocateOutputColumn(TypeInfoFactory.stringTypeInfo);
    Class<?> cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class);
    expr = createVectorExpression(cl, null, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
    ((IStringInExpr) expr).setInListValues(serializedInChildren);
    ((IStructInExpr) expr).setScratchBytesColumn(scratchBytesCol);
    ((IStructInExpr) expr).setStructColumnExprs(this, colExpr.getChildren(), fieldVectorColumnTypes);
    return expr;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongWritable(org.apache.hadoop.io.LongWritable) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) IntWritable(org.apache.hadoop.io.IntWritable) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Text(org.apache.hadoop.io.Text) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InputExpressionType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType) ArgumentType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)

Example 4 with BinarySortableSerializeWrite

use of org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite in project hive by apache.

the class VectorReduceSinkCommonOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    if (LOG.isDebugEnabled()) {
        // Determine the name of our map or reduce task for debug tracing.
        BaseWork work = Utilities.getMapWork(hconf);
        if (work == null) {
            work = Utilities.getReduceWork(hconf);
        }
        taskName = work.getName();
    }
    String context = hconf.get(Operator.CONTEXT_NAME_KEY, "");
    if (context != null && !context.isEmpty()) {
        context = "_" + context.replace(" ", "_");
    }
    statsMap.put(Counter.RECORDS_OUT_INTERMEDIATE + context, recordCounter);
    reduceSkipTag = conf.getSkipTag();
    reduceTagByte = (byte) conf.getTag();
    if (isLogInfoEnabled) {
        LOG.info("Using tag = " + (int) reduceTagByte);
    }
    TableDesc keyTableDesc = conf.getKeySerializeInfo();
    boolean[] columnSortOrder = getColumnSortOrder(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length);
    byte[] columnNullMarker = getColumnNullMarker(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length, columnSortOrder);
    byte[] columnNotNullMarker = getColumnNotNullMarker(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length, columnSortOrder);
    keyBinarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrder, columnNullMarker, columnNotNullMarker);
    // Create all nulls key.
    try {
        Output nullKeyOutput = new Output();
        keyBinarySortableSerializeWrite.set(nullKeyOutput);
        for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) {
            keyBinarySortableSerializeWrite.writeNull();
        }
        int nullBytesLength = nullKeyOutput.getLength();
        nullBytes = new byte[nullBytesLength];
        System.arraycopy(nullKeyOutput.getData(), 0, nullBytes, 0, nullBytesLength);
        nullKeyHashCode = HashCodeUtil.calculateBytesHashCode(nullBytes, 0, nullBytesLength);
    } catch (Exception e) {
        throw new HiveException(e);
    }
    valueLazyBinarySerializeWrite = new LazyBinarySerializeWrite(reduceSinkValueColumnMap.length);
    valueVectorSerializeRow = new VectorSerializeRow<LazyBinarySerializeWrite>(valueLazyBinarySerializeWrite);
    valueVectorSerializeRow.init(reduceSinkValueTypeInfos, reduceSinkValueColumnMap);
    valueOutput = new Output();
    valueVectorSerializeRow.setOutput(valueOutput);
    keyWritable = new HiveKey();
    valueBytesWritable = new BytesWritable();
    batchCounter = 0;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) BytesWritable(org.apache.hadoop.io.BytesWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Example 5 with BinarySortableSerializeWrite

use of org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite in project hive by apache.

the class VectorMapJoinLeftSemiMultiKeyOperator method process.

//---------------------------------------------------------------------------
// Process Multi-Key Left-Semi Join on a vectorized row batch.
//
@Override
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
            commonSetup(batch);
            /*
         * Initialize Multi-Key members for this specialized class.
         */
            keyVectorSerializeWrite = new VectorSerializeRow(new BinarySortableSerializeWrite(bigTableKeyColumnMap.length));
            keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap);
            currentKeyOutput = new Output();
            saveKeyOutput = new Output();
            needCommonSetup = false;
        }
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
            /*
         * Get our Multi-Key hash set information for this specialized class.
         */
            hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable;
            needHashTableSetup = false;
        }
        batchCounter++;
        // For left semi joins, we may apply the filter(s) now.
        for (VectorExpression ve : bigTableFilterExpressions) {
            ve.evaluate(batch);
        }
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (isLogDebugEnabled) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
            }
            return;
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
                ve.evaluate(batch);
            }
        }
        /*
       * Multi-Key specific declarations.
       */
        // None.
        /*
       * Multi-Key Long check for repeating.
       */
        // If all BigTable input columns to key expressions are isRepeating, then
        // calculate key once; lookup once.
        boolean allKeyInputColumnsRepeating;
        if (bigTableKeyColumnMap.length == 0) {
            allKeyInputColumnsRepeating = false;
        } else {
            allKeyInputColumnsRepeating = true;
            for (int i = 0; i < bigTableKeyColumnMap.length; i++) {
                if (!batch.cols[bigTableKeyColumnMap[i]].isRepeating) {
                    allKeyInputColumnsRepeating = false;
                    break;
                }
            }
        }
        if (allKeyInputColumnsRepeating) {
            /*
         * Repeating.
         */
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
            /*
         * Multi-Key specific repeated lookup.
         */
            keyVectorSerializeWrite.setOutput(currentKeyOutput);
            keyVectorSerializeWrite.serializeWrite(batch, 0);
            JoinUtil.JoinResult joinResult;
            if (keyVectorSerializeWrite.getHasAnyNulls()) {
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                byte[] keyBytes = currentKeyOutput.getData();
                int keyLength = currentKeyOutput.getLength();
                // LOG.debug(CLASS_NAME + " processOp all " + displayBytes(keyBytes, 0, keyLength));
                joinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[0]);
            }
            if (isLogDebugEnabled) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
            }
            finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
        } else {
            if (isLogDebugEnabled) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            }
            // We remember any matching rows in matchs / matchSize.  At the end of the loop,
            // selected / batch.size will represent both matching and non-matching rows for outer join.
            // Only deferred rows will have been removed from selected.
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashSetResultCount = 0;
            int allMatchCount = 0;
            int spillCount = 0;
            /*
         * Multi-Key specific variables.
         */
            Output temp;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < inputLogicalSize; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
                /*
           * Multi-Key get key.
           */
                // Generate binary sortable key for current row in vectorized row batch.
                keyVectorSerializeWrite.setOutput(currentKeyOutput);
                keyVectorSerializeWrite.serializeWrite(batch, batchIndex);
                boolean isAnyNull = keyVectorSerializeWrite.getHasAnyNulls();
                if (isAnyNull || !haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) {
                    if (haveSaveKey) {
                        // Move on with our counts.
                        switch(saveJoinResult) {
                            case MATCH:
                                // We have extracted the existence from the hash set result, so we don't keep it.
                                break;
                            case SPILL:
                                // We keep the hash set result for its spill information.
                                hashSetResultCount++;
                                break;
                            case NOMATCH:
                                break;
                        }
                    }
                    if (isAnyNull) {
                        saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        haveSaveKey = false;
                    } else {
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
                        /*
               * Multi-Key specific save key and lookup.
               */
                        temp = saveKeyOutput;
                        saveKeyOutput = currentKeyOutput;
                        currentKeyOutput = temp;
                        /*
               * Multi-key specific lookup key.
               */
                        byte[] keyBytes = saveKeyOutput.getData();
                        int keyLength = saveKeyOutput.getLength();
                        saveJoinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[hashSetResultCount]);
                    }
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                            break;
                    }
                } else {
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
                            break;
                    }
                }
            }
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                        // We have extracted the existence from the hash set result, so we don't keep it.
                        break;
                    case SPILL:
                        // We keep the hash set result for its spill information.
                        hashSetResultCount++;
                        break;
                    case NOMATCH:
                        break;
                }
            }
            if (isLogDebugEnabled) {
                LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
            }
            finishLeftSemi(batch, allMatchCount, spillCount, (VectorMapJoinHashTableResult[]) hashSetResults);
        }
        if (batch.size > 0) {
            // Forward any remaining selected rows.
            forwardBigTableBatch(batch);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : VectorMapJoinHashTableResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult) JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) VectorSerializeRow(org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Aggregations

BinarySortableSerializeWrite (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite)12 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)11 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 IOException (java.io.IOException)6 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)4 VectorSerializeRow (org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow)4 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)4 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 LazyBinarySerializeWrite (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite)4 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)3 BinarySortableDeserializeRead (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead)3 SerializeWrite (org.apache.hadoop.hive.serde2.fast.SerializeWrite)3 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)3 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)3 BytesWritable (org.apache.hadoop.io.BytesWritable)3 ArrayList (java.util.ArrayList)2 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)2 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)2 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)2 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)2