Search in sources :

Example 36 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class CastStringToDate method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
    int[] sel = batch.selected;
    int n = batch.size;
    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
    if (n == 0) {
        // Nothing to do
        return;
    }
    if (inV.noNulls) {
        outV.noNulls = true;
        if (inV.isRepeating) {
            outV.isRepeating = true;
            evaluate(outV, inV, 0);
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                evaluate(outV, inV, i);
            }
            outV.isRepeating = false;
        } else {
            for (int i = 0; i != n; i++) {
                evaluate(outV, inV, i);
            }
            outV.isRepeating = false;
        }
    } else {
        // Handle case with nulls. Don't do function if the value is null,
        // because the data may be undefined for a null value.
        outV.noNulls = false;
        if (inV.isRepeating) {
            outV.isRepeating = true;
            outV.isNull[0] = inV.isNull[0];
            if (!inV.isNull[0]) {
                evaluate(outV, inV, 0);
            }
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outV.isNull[i] = inV.isNull[i];
                if (!inV.isNull[i]) {
                    evaluate(outV, inV, i);
                }
            }
            outV.isRepeating = false;
        } else {
            System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
            for (int i = 0; i != n; i++) {
                if (!inV.isNull[i]) {
                    evaluate(outV, inV, i);
                }
            }
            outV.isRepeating = false;
        }
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 37 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class CastStringToIntervalYearMonth method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
    int[] sel = batch.selected;
    int n = batch.size;
    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
    if (n == 0) {
        // Nothing to do
        return;
    }
    if (inV.noNulls) {
        outV.noNulls = true;
        if (inV.isRepeating) {
            outV.isRepeating = true;
            evaluate(outV, inV, 0);
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                evaluate(outV, inV, i);
            }
            outV.isRepeating = false;
        } else {
            for (int i = 0; i != n; i++) {
                evaluate(outV, inV, i);
            }
            outV.isRepeating = false;
        }
    } else {
        // Handle case with nulls. Don't do function if the value is null,
        // because the data may be undefined for a null value.
        outV.noNulls = false;
        if (inV.isRepeating) {
            outV.isRepeating = true;
            outV.isNull[0] = inV.isNull[0];
            if (!inV.isNull[0]) {
                evaluate(outV, inV, 0);
            }
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outV.isNull[i] = inV.isNull[i];
                if (!inV.isNull[i]) {
                    evaluate(outV, inV, i);
                }
            }
            outV.isRepeating = false;
        } else {
            System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
            for (int i = 0; i != n; i++) {
                if (!inV.isNull[i]) {
                    evaluate(outV, inV, i);
                }
            }
            outV.isRepeating = false;
        }
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 38 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class AbstractFilterStringColLikeStringScalar method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (checker == null) {
        checker = createChecker(pattern);
    }
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
    int[] sel = batch.selected;
    boolean[] nullPos = inputColVector.isNull;
    int n = batch.size;
    byte[][] vector = inputColVector.vector;
    int[] length = inputColVector.length;
    int[] start = inputColVector.start;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    if (inputColVector.noNulls) {
        if (inputColVector.isRepeating) {
            // All must be selected otherwise size would be zero Repeating property will not change.
            if (!checker.check(vector[0], start[0], length[0])) {
                // Entire batch is filtered out.
                batch.size = 0;
            }
        } else if (batch.selectedInUse) {
            int newSize = 0;
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                if (checker.check(vector[i], start[i], length[i])) {
                    sel[newSize++] = i;
                }
            }
            batch.size = newSize;
        } else {
            int newSize = 0;
            for (int i = 0; i != n; i++) {
                if (checker.check(vector[i], start[i], length[i])) {
                    sel[newSize++] = i;
                }
            }
            if (newSize < n) {
                batch.size = newSize;
                batch.selectedInUse = true;
            }
        }
    } else {
        if (inputColVector.isRepeating) {
            //All must be selected otherwise size would be zero. Repeating property will not change.
            if (!nullPos[0]) {
                if (!checker.check(vector[0], start[0], length[0])) {
                    //Entire batch is filtered out.
                    batch.size = 0;
                }
            } else {
                batch.size = 0;
            }
        } else if (batch.selectedInUse) {
            int newSize = 0;
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                if (!nullPos[i]) {
                    if (checker.check(vector[i], start[i], length[i])) {
                        sel[newSize++] = i;
                    }
                }
            }
            //Change the selected vector
            batch.size = newSize;
        } else {
            int newSize = 0;
            for (int i = 0; i != n; i++) {
                if (!nullPos[i]) {
                    if (checker.check(vector[i], start[i], length[i])) {
                        sel[newSize++] = i;
                    }
                }
            }
            if (newSize < n) {
                batch.size = newSize;
                batch.selectedInUse = true;
            }
        /* If every row qualified (newSize==n), then we can ignore the sel vector to streamline
         * future operations. So selectedInUse will remain false.
         */
        }
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)

Example 39 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class VectorMapJoinLeftSemiStringOperator method process.

//---------------------------------------------------------------------------
// Process Single-Column String Left-Semi Join on a vectorized row batch.
//
@Override
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
            commonSetup(batch);
            /*
         * Initialize Single-Column String members for this specialized class.
         */
            singleJoinColumn = bigTableKeyColumnMap[0];
            needCommonSetup = false;
        }
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
            /*
         * Get our Single-Column String hash set information for this specialized class.
         */
            hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable;
            needHashTableSetup = false;
        }
        batchCounter++;
        // For left semi joins, we may apply the filter(s) now.
        for (VectorExpression ve : bigTableFilterExpressions) {
            ve.evaluate(batch);
        }
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (isLogDebugEnabled) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
            }
            return;
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
                ve.evaluate(batch);
            }
        }
        /*
       * Single-Column String specific declarations.
       */
        // The one join column for this specialized class.
        BytesColumnVector joinColVector = (BytesColumnVector) batch.cols[singleJoinColumn];
        byte[][] vector = joinColVector.vector;
        int[] start = joinColVector.start;
        int[] length = joinColVector.length;
        /*
       * Single-Column Long check for repeating.
       */
        // Check single column for repeating.
        boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
        if (allKeyInputColumnsRepeating) {
            /*
         * Repeating.
         */
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
            /*
         * Single-Column String specific repeated lookup.
         */
            JoinUtil.JoinResult joinResult;
            if (!joinColVector.noNulls && joinColVector.isNull[0]) {
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                byte[] keyBytes = vector[0];
                int keyStart = start[0];
                int keyLength = length[0];
                joinResult = hashSet.contains(keyBytes, keyStart, keyLength, hashSetResults[0]);
            }
            if (isLogDebugEnabled) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
            }
            finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
        } else {
            if (isLogDebugEnabled) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            }
            // We remember any matching rows in matchs / matchSize.  At the end of the loop,
            // selected / batch.size will represent both matching and non-matching rows for outer join.
            // Only deferred rows will have been removed from selected.
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashSetResultCount = 0;
            int allMatchCount = 0;
            int spillCount = 0;
            /*
         * Single-Column String specific variables.
         */
            int saveKeyBatchIndex = -1;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < inputLogicalSize; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
                /*
           * Single-Column String get key.
           */
                // Implicit -- use batchIndex.
                boolean isNull = !joinColVector.noNulls && joinColVector.isNull[batchIndex];
                if (isNull || !haveSaveKey || StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex], vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
                    if (haveSaveKey) {
                        // Move on with our counts.
                        switch(saveJoinResult) {
                            case MATCH:
                                // We have extracted the existence from the hash set result, so we don't keep it.
                                break;
                            case SPILL:
                                // We keep the hash set result for its spill information.
                                hashSetResultCount++;
                                break;
                            case NOMATCH:
                                break;
                        }
                    }
                    if (isNull) {
                        saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        haveSaveKey = false;
                    } else {
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
                        /*
               * Single-Column String specific save key and lookup.
               */
                        saveKeyBatchIndex = batchIndex;
                        /*
               * Single-Column String specific lookup key.
               */
                        byte[] keyBytes = vector[batchIndex];
                        int keyStart = start[batchIndex];
                        int keyLength = length[batchIndex];
                        saveJoinResult = hashSet.contains(keyBytes, keyStart, keyLength, hashSetResults[hashSetResultCount]);
                    }
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                            break;
                    }
                } else {
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
                            break;
                    }
                }
            }
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                        // We have extracted the existence from the hash set result, so we don't keep it.
                        break;
                    case SPILL:
                        // We keep the hash set result for its spill information.
                        hashSetResultCount++;
                        break;
                    case NOMATCH:
                        break;
                }
            }
            if (isLogDebugEnabled) {
                LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
            }
            finishLeftSemi(batch, allMatchCount, spillCount, (VectorMapJoinHashTableResult[]) hashSetResults);
        }
        if (batch.size > 0) {
            // Forward any remaining selected rows.
            forwardBigTableBatch(batch);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : VectorMapJoinHashTableResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult) JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 40 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class VectorizedPrimitiveColumnReader method decodeDictionaryIds.

/**
   * Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`.
   */
private void decodeDictionaryIds(int rowId, int num, ColumnVector column, LongColumnVector dictionaryIds) {
    System.arraycopy(dictionaryIds.isNull, rowId, column.isNull, rowId, num);
    if (column.noNulls) {
        column.noNulls = dictionaryIds.noNulls;
    }
    column.isRepeating = column.isRepeating && dictionaryIds.isRepeating;
    switch(descriptor.getType()) {
        case INT32:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.decodeToInt((int) dictionaryIds.vector[i]);
            }
            break;
        case INT64:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.decodeToLong((int) dictionaryIds.vector[i]);
            }
            break;
        case FLOAT:
            for (int i = rowId; i < rowId + num; ++i) {
                ((DoubleColumnVector) column).vector[i] = dictionary.decodeToFloat((int) dictionaryIds.vector[i]);
            }
            break;
        case DOUBLE:
            for (int i = rowId; i < rowId + num; ++i) {
                ((DoubleColumnVector) column).vector[i] = dictionary.decodeToDouble((int) dictionaryIds.vector[i]);
            }
            break;
        case INT96:
            final Calendar calendar;
            if (Strings.isNullOrEmpty(this.conversionTimeZone)) {
                // Local time should be used if no timezone is specified
                calendar = Calendar.getInstance();
            } else {
                calendar = Calendar.getInstance(TimeZone.getTimeZone(this.conversionTimeZone));
            }
            for (int i = rowId; i < rowId + num; ++i) {
                ByteBuffer buf = dictionary.decodeToBinary((int) dictionaryIds.vector[i]).toByteBuffer();
                buf.order(ByteOrder.LITTLE_ENDIAN);
                long timeOfDayNanos = buf.getLong();
                int julianDay = buf.getInt();
                NanoTime nt = new NanoTime(julianDay, timeOfDayNanos);
                Timestamp ts = NanoTimeUtils.getTimestamp(nt, calendar);
                ((TimestampColumnVector) column).set(i, ts);
            }
            break;
        case BINARY:
        case FIXED_LEN_BYTE_ARRAY:
            if (column instanceof BytesColumnVector) {
                for (int i = rowId; i < rowId + num; ++i) {
                    ((BytesColumnVector) column).setVal(i, dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe());
                }
            } else {
                DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column);
                decimalColumnVector.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision();
                decimalColumnVector.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale();
                for (int i = rowId; i < rowId + num; ++i) {
                    decimalColumnVector.vector[i].set(dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe(), decimalColumnVector.scale);
                }
            }
            break;
        default:
            throw new UnsupportedOperationException("Unsupported type: " + descriptor.getType());
    }
}
Also used : NanoTime(org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Calendar(java.util.Calendar) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ByteBuffer(java.nio.ByteBuffer) Timestamp(java.sql.Timestamp) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Aggregations

BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)124 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)66 Test (org.junit.Test)50 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)44 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)12 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)10 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)8 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Text (org.apache.hadoop.io.Text)8 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)6 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)4 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 TypeDescription (org.apache.orc.TypeDescription)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)3 ParseException (java.text.ParseException)3 Random (java.util.Random)3