Search in sources :

Example 81 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinInnerBigOnlyStringOperator method process.

// ---------------------------------------------------------------------------
// Process Single-Column String Inner Big-Only Join on a vectorized row batch.
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
         * Initialize Single-Column String members for this specialized class.
            singleJoinColumn = bigTableKeyColumnMap[0];
            needCommonSetup = false;
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
         * Get our Single-Column String hash multi-set information for this specialized class.
            hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable;
            needHashTableSetup = false;
        // For inner joins, we may apply the filter(s) now.
        for (VectorExpression ve : bigTableFilterExpressions) {
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
        // We rebuild in-place the selected array with rows destine to be forwarded.
        int numSel = 0;
       * Single-Column String specific declarations.
        // The one join column for this specialized class.
        BytesColumnVector joinColVector = (BytesColumnVector) batch.cols[singleJoinColumn];
        byte[][] vector = joinColVector.vector;
        int[] start = joinColVector.start;
        int[] length = joinColVector.length;
       * Single-Column String check for repeating.
        // Check single column for repeating.
        boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
        if (allKeyInputColumnsRepeating) {
         * Repeating.
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
         * Single-Column String specific repeated lookup.
            JoinUtil.JoinResult joinResult;
            if (!joinColVector.noNulls && joinColVector.isNull[0]) {
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                byte[] keyBytes = vector[0];
                int keyStart = start[0];
                int keyLength = length[0];
                joinResult = hashMultiSet.contains(keyBytes, keyStart, keyLength, hashMultiSetResults[0]);
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " +;
            finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            // We remember any matching rows in matchs / matchSize.  At the end of the loop,
            // selected / batch.size will represent both matching and non-matching rows for outer join.
            // Only deferred rows will have been removed from selected.
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashMultiSetResultCount = 0;
            int allMatchCount = 0;
            int equalKeySeriesCount = 0;
            int spillCount = 0;
         * Single-Column String specific variables.
            int saveKeyBatchIndex = -1;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < inputLogicalSize; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
           * Single-Column String get key.
                // Implicit -- use batchIndex.
                boolean isNull = !joinColVector.noNulls && joinColVector.isNull[batchIndex];
                if (isNull || !haveSaveKey || StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex], vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
                    if (haveSaveKey) {
                        // Move on with our counts.
                        switch(saveJoinResult) {
                            case MATCH:
                                // We have extracted the count from the hash multi-set result, so we don't keep it.
                            case SPILL:
                                // We keep the hash multi-set result for its spill information.
                            case NOMATCH:
                    if (isNull) {
                        saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        haveSaveKey = false;
                    } else {
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
               * Single-Column String specific save key.
                        saveKeyBatchIndex = batchIndex;
               * Single-Column String specific lookup key.
                        byte[] keyBytes = vector[batchIndex];
                        int keyStart = start[batchIndex];
                        int keyLength = length[batchIndex];
                        saveJoinResult = hashMultiSet.contains(keyBytes, keyStart, keyLength, hashMultiSetResults[hashMultiSetResultCount]);
                    switch(saveJoinResult) {
                        case MATCH:
                            equalKeySeriesValueCounts[equalKeySeriesCount] = hashMultiSetResults[hashMultiSetResultCount].count();
                            equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
                            equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashMultiSetResultCount;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                } else {
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashMultiSetResultCount;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                        // We have extracted the count from the hash multi-set result, so we don't keep it.
                    case SPILL:
                        // We keep the hash multi-set result for its spill information.
                    case NOMATCH:
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesValueCounts " + longArrayToRangesString(equalKeySeriesValueCounts, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesDuplicateCounts " + intArrayToRangesString(equalKeySeriesDuplicateCounts, equalKeySeriesCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMultiSetResults, 0, hashMultiSetResultCount)));
            finishInnerBigOnly(batch, allMatchCount, equalKeySeriesCount, spillCount, (VectorMapJoinHashTableResult[]) hashMultiSetResults, hashMultiSetResultCount);
        if (batch.size > 0) {
            // Forward any remaining selected rows.
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
Also used : VectorMapJoinHashTableResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult) JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException( IOException( HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 82 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinInnerLongOperator method process.

// ---------------------------------------------------------------------------
// Process Single-Column Long Inner Join on a vectorized row batch.
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
         * Initialize Single-Column Long members for this specialized class.
            singleJoinColumn = bigTableKeyColumnMap[0];
            needCommonSetup = false;
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
         * Get our Single-Column Long hash map information for this specialized class.
            hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable;
            useMinMax = hashMap.useMinMax();
            if (useMinMax) {
                min = hashMap.min();
                max = hashMap.max();
            needHashTableSetup = false;
        // Do the per-batch setup for an inner join.
        // For inner joins, we may apply the filter(s) now.
        for (VectorExpression ve : bigTableFilterExpressions) {
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
       * Single-Column Long specific declarations.
        // The one join column for this specialized class.
        LongColumnVector joinColVector = (LongColumnVector) batch.cols[singleJoinColumn];
        long[] vector = joinColVector.vector;
       * Single-Column Long check for repeating.
        // Check single column for repeating.
        boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
        if (allKeyInputColumnsRepeating) {
         * Repeating.
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
         * Single-Column Long specific repeated lookup.
            JoinUtil.JoinResult joinResult;
            if (!joinColVector.noNulls && joinColVector.isNull[0]) {
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                long key = vector[0];
                if (useMinMax && (key < min || key > max)) {
                    // Out of range for whole batch.
                    joinResult = JoinUtil.JoinResult.NOMATCH;
                } else {
                    joinResult = hashMap.lookup(key, hashMapResults[0]);
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " +;
            finishInnerRepeated(batch, joinResult, hashMapResults[0]);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            // We remember any matching rows in matchs / matchSize.  At the end of the loop,
            // selected / batch.size will represent both matching and non-matching rows for outer join.
            // Only deferred rows will have been removed from selected.
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashMapResultCount = 0;
            int allMatchCount = 0;
            int equalKeySeriesCount = 0;
            int spillCount = 0;
         * Single-Column Long specific variables.
            long saveKey = 0;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < inputLogicalSize; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
           * Single-Column Long get key.
                long currentKey;
                boolean isNull;
                if (!joinColVector.noNulls && joinColVector.isNull[batchIndex]) {
                    currentKey = 0;
                    isNull = true;
                } else {
                    currentKey = vector[batchIndex];
                    isNull = false;
                if (isNull || !haveSaveKey || currentKey != saveKey) {
                    if (haveSaveKey) {
                        // Move on with our counts.
                        switch(saveJoinResult) {
                            case MATCH:
                            case SPILL:
                            case NOMATCH:
                    if (isNull) {
                        saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        haveSaveKey = false;
                    } else {
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
               * Single-Column Long specific save key.
                        saveKey = currentKey;
                        if (useMinMax && (currentKey < min || currentKey > max)) {
                            // Key out of range for whole hash table.
                            saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        } else {
                            saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount]);
                    switch(saveJoinResult) {
                        case MATCH:
                            equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
                            equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
                            equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
                            equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashMapResultCount;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                } else {
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashMapResultCount;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                    case SPILL:
                    case NOMATCH:
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
            finishInner(batch, allMatchCount, equalKeySeriesCount, spillCount, hashMapResultCount);
        if (batch.size > 0) {
            // Forward any remaining selected rows.
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
Also used : JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException( IOException( HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 83 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinInnerStringOperator method process.

// ---------------------------------------------------------------------------
// Process Single-Column String Inner Join on a vectorized row batch.
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
         * Initialize Single-Column String members for this specialized class.
            singleJoinColumn = bigTableKeyColumnMap[0];
            needCommonSetup = false;
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
         * Get our Single-Column String hash map information for this specialized class.
            hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable;
            needHashTableSetup = false;
        // Do the per-batch setup for an inner join.
        // For inner joins, we may apply the filter(s) now.
        for (VectorExpression ve : bigTableFilterExpressions) {
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
       * Single-Column String specific declarations.
        // The one join column for this specialized class.
        BytesColumnVector joinColVector = (BytesColumnVector) batch.cols[singleJoinColumn];
        byte[][] vector = joinColVector.vector;
        int[] start = joinColVector.start;
        int[] length = joinColVector.length;
       * Single-Column String check for repeating.
        // Check single column for repeating.
        boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
        if (allKeyInputColumnsRepeating) {
         * Repeating.
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
         * Single-Column String specific repeated lookup.
            JoinUtil.JoinResult joinResult;
            if (!joinColVector.noNulls && joinColVector.isNull[0]) {
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                byte[] keyBytes = vector[0];
                int keyStart = start[0];
                int keyLength = length[0];
                joinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[0]);
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " +;
            finishInnerRepeated(batch, joinResult, hashMapResults[0]);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            // We remember any matching rows in matchs / matchSize.  At the end of the loop,
            // selected / batch.size will represent both matching and non-matching rows for outer join.
            // Only deferred rows will have been removed from selected.
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashMapResultCount = 0;
            int allMatchCount = 0;
            int equalKeySeriesCount = 0;
            int spillCount = 0;
         * Single-Column String specific variables.
            int saveKeyBatchIndex = -1;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < inputLogicalSize; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
           * Single-Column String get key.
                // Implicit -- use batchIndex.
                boolean isNull = !joinColVector.noNulls && joinColVector.isNull[batchIndex];
                if (isNull || !haveSaveKey || StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex], vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
                    if (haveSaveKey) {
                        // Move on with our counts.
                        switch(saveJoinResult) {
                            case MATCH:
                            case SPILL:
                            case NOMATCH:
                    if (isNull) {
                        saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        haveSaveKey = false;
                    } else {
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
               * Single-Column String specific save key.
                        saveKeyBatchIndex = batchIndex;
               * Single-Column String specific lookup key.
                        byte[] keyBytes = vector[batchIndex];
                        int keyStart = start[batchIndex];
                        int keyLength = length[batchIndex];
                        saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[hashMapResultCount]);
                    switch(saveJoinResult) {
                        case MATCH:
                            equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
                            equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
                            equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
                            equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashMapResultCount;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                } else {
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashMapResultCount;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                    case SPILL:
                    case NOMATCH:
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
            finishInner(batch, allMatchCount, equalKeySeriesCount, spillCount, hashMapResultCount);
        if (batch.size > 0) {
            // Forward any remaining selected rows.
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
Also used : JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException( IOException( HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 84 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinLeftSemiLongOperator method process.

// ---------------------------------------------------------------------------
// Process Single-Column Long Left-Semi Join on a vectorized row batch.
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
         * Initialize Single-Column Long members for this specialized class.
            singleJoinColumn = bigTableKeyColumnMap[0];
            needCommonSetup = false;
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
         * Get our Single-Column Long hash set information for this specialized class.
            hashSet = (VectorMapJoinLongHashSet) vectorMapJoinHashTable;
            useMinMax = hashSet.useMinMax();
            if (useMinMax) {
                min = hashSet.min();
                max = hashSet.max();
            needHashTableSetup = false;
        // For left semi joins, we may apply the filter(s) now.
        for (VectorExpression ve : bigTableFilterExpressions) {
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
       * Single-Column Long specific declarations.
        // The one join column for this specialized class.
        LongColumnVector joinColVector = (LongColumnVector) batch.cols[singleJoinColumn];
        long[] vector = joinColVector.vector;
       * Single-Column Long check for repeating.
        // Check single column for repeating.
        boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
        if (allKeyInputColumnsRepeating) {
         * Repeating.
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
         * Single-Column Long specific repeated lookup.
            JoinUtil.JoinResult joinResult;
            if (!joinColVector.noNulls && joinColVector.isNull[0]) {
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                long key = vector[0];
                if (useMinMax && (key < min || key > max)) {
                    // Out of range for whole batch.
                    joinResult = JoinUtil.JoinResult.NOMATCH;
                } else {
                    joinResult = hashSet.contains(key, hashSetResults[0]);
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " +;
            finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            // We remember any matching rows in matchs / matchSize.  At the end of the loop,
            // selected / batch.size will represent both matching and non-matching rows for outer join.
            // Only deferred rows will have been removed from selected.
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashSetResultCount = 0;
            int allMatchCount = 0;
            int spillCount = 0;
         * Single-Column Long specific variables.
            long saveKey = 0;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < inputLogicalSize; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
           * Single-Column Long get key.
                long currentKey;
                boolean isNull;
                if (!joinColVector.noNulls && joinColVector.isNull[batchIndex]) {
                    currentKey = 0;
                    isNull = true;
                } else {
                    currentKey = vector[batchIndex];
                    isNull = false;
                if (isNull || !haveSaveKey || currentKey != saveKey) {
                    if (haveSaveKey) {
                        // Move on with our counts.
                        switch(saveJoinResult) {
                            case MATCH:
                                // We have extracted the existence from the hash set result, so we don't keep it.
                            case SPILL:
                                // We keep the hash set result for its spill information.
                            case NOMATCH:
                    if (isNull) {
                        saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        haveSaveKey = false;
                    } else {
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
               * Single-Column Long specific save key.
                        saveKey = currentKey;
                        if (useMinMax && (currentKey < min || currentKey > max)) {
                            // Key out of range for whole hash table.
                            saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        } else {
                            saveJoinResult = hashSet.contains(currentKey, hashSetResults[hashSetResultCount]);
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashSetResultCount;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                } else {
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashSetResultCount;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                        // We have extracted the existence from the hash set result, so we don't keep it.
                    case SPILL:
                        // We keep the hash set result for its spill information.
                    case NOMATCH:
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
            finishLeftSemi(batch, allMatchCount, spillCount, (VectorMapJoinHashTableResult[]) hashSetResults);
        if (batch.size > 0) {
            // Forward any remaining selected rows.
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
Also used : VectorMapJoinHashTableResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult) JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException( IOException( HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 85 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinOuterLongOperator method process.

// ---------------------------------------------------------------------------
// Process Single-Column Long Outer Join on a vectorized row batch.
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
         * Initialize Single-Column Long members for this specialized class.
            singleJoinColumn = bigTableKeyColumnMap[0];
            needCommonSetup = false;
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
         * Get our Single-Column Long hash map information for this specialized class.
            hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable;
            useMinMax = hashMap.useMinMax();
            if (useMinMax) {
                min = hashMap.min();
                max = hashMap.max();
            needHashTableSetup = false;
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
        // Do the per-batch setup for an outer join.
        // For outer join, remember our input rows before ON expression filtering or before
        // hash table matching so we can generate results for all rows (matching and non matching)
        // later.
        boolean inputSelectedInUse = batch.selectedInUse;
        if (inputSelectedInUse) {
            // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
            // throw new HiveException("batch.selected is not in sort order and unique");
            // }
            System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
        // Filtering for outer join just removes rows available for hash table matching.
        boolean someRowsFilteredOut = false;
        if (bigTableFilterExpressions.length > 0) {
            // Since the input
            for (VectorExpression ve : bigTableFilterExpressions) {
            someRowsFilteredOut = (batch.size != inputLogicalSize);
            if (LOG.isDebugEnabled()) {
                if (batch.selectedInUse) {
                    if (inputSelectedInUse) {
                        LOG.debug(CLASS_NAME + " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
                    } else {
                        LOG.debug(CLASS_NAME + " inputLogicalSize " + inputLogicalSize + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
       * Single-Column Long specific declarations.
        // The one join column for this specialized class.
        LongColumnVector joinColVector = (LongColumnVector) batch.cols[singleJoinColumn];
        long[] vector = joinColVector.vector;
       * Single-Column Long check for repeating.
        // Check single column for repeating.
        boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
        if (allKeyInputColumnsRepeating) {
         * Repeating.
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
         * Single-Column Long specific repeated lookup.
            JoinUtil.JoinResult joinResult;
            if (batch.size == 0) {
                // Whole repeated key batch was filtered out.
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else if (!joinColVector.noNulls && joinColVector.isNull[0]) {
                // Any (repeated) null key column is no match for whole batch.
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                // Handle *repeated* join key, if found.
                long key = vector[0];
                // LOG.debug(CLASS_NAME + " repeated key " + key);
                if (useMinMax && (key < min || key > max)) {
                    // Out of range for whole batch.
                    joinResult = JoinUtil.JoinResult.NOMATCH;
                } else {
                    joinResult = hashMap.lookup(key, hashMapResults[0]);
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " +;
            finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashMapResultCount = 0;
            int allMatchCount = 0;
            int equalKeySeriesCount = 0;
            int spillCount = 0;
            boolean atLeastOneNonMatch = someRowsFilteredOut;
         * Single-Column Long specific variables.
            long saveKey = 0;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < batch.size; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
           * Single-Column Long outer null detection.
                boolean isNull = !joinColVector.noNulls && joinColVector.isNull[batchIndex];
                if (isNull) {
                    // Have that the NULL does not interfere with the current equal key series, if there
                    // is one. We do not set saveJoinResult.
                    // Let a current MATCH equal key series keep going, or
                    // Let a current SPILL equal key series keep going, or
                    // Let a current NOMATCH keep not matching.
                    atLeastOneNonMatch = true;
                // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
                } else {
             * Single-Column Long outer get key.
                    long currentKey = vector[batchIndex];
                    if (!haveSaveKey || currentKey != saveKey) {
                        if (haveSaveKey) {
                            // Move on with our counts.
                            switch(saveJoinResult) {
                                case MATCH:
                                case SPILL:
                                case NOMATCH:
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
               * Single-Column Long specific save key.
                        saveKey = currentKey;
                        if (useMinMax && (currentKey < min || currentKey > max)) {
                            // Key out of range for whole hash table.
                            saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        } else {
                            saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount]);
                        switch(saveJoinResult) {
                            case MATCH:
                                equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
                                equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
                                equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
                                equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
                                allMatchs[allMatchCount++] = batchIndex;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                            case SPILL:
                                spills[spillCount] = batchIndex;
                                spillHashMapResultIndices[spillCount] = hashMapResultCount;
                            case NOMATCH:
                                atLeastOneNonMatch = true;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                    } else {
                        switch(saveJoinResult) {
                            case MATCH:
                                allMatchs[allMatchCount++] = batchIndex;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                            case SPILL:
                                spills[spillCount] = batchIndex;
                                spillHashMapResultIndices[spillCount] = hashMapResultCount;
                            case NOMATCH:
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
                // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
                // throw new HiveException("allMatchs is not in sort order and unique");
                // }
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                    case SPILL:
                    case NOMATCH:
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + " atLeastOneNonMatch " + atLeastOneNonMatch + " inputSelectedInUse " + inputSelectedInUse + " inputLogicalSize " + inputLogicalSize + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
            // We will generate results for all matching and non-matching rows.
            finishOuter(batch, allMatchCount, equalKeySeriesCount, atLeastOneNonMatch, inputSelectedInUse, inputLogicalSize, spillCount, hashMapResultCount);
        if (batch.size > 0) {
            // Forward any remaining selected rows.
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
Also used : JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException( IOException( HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)


VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)140 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)57 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)44 ArrayList (java.util.ArrayList)43 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)38 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)32 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)30 Test (org.junit.Test)29 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)27 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)27 IOException ( ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)25 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)25 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)23 FilterConstantBooleanVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression)23 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)23 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)19 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)19 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)19 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)15