Search in sources :

Example 51 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class StringScalarConcatStringGroupCol method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn];
    int[] sel = batch.selected;
    int n = batch.size;
    byte[][] vector = inputColVector.vector;
    int[] start = inputColVector.start;
    int[] length = inputColVector.length;
    if (n == 0) {
        // Nothing to do
        return;
    }
    // initialize output vector buffer to receive data
    outV.initBuffer();
    if (inputColVector.noNulls) {
        outV.noNulls = true;
        if (inputColVector.isRepeating) {
            outV.isRepeating = true;
            outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]);
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
            }
            outV.isRepeating = false;
        } else {
            for (int i = 0; i != n; i++) {
                outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
            }
            outV.isRepeating = false;
        }
    } else {
        /*
       * Handle case with nulls. Don't do function if the value is null, to save time,
       * because calling the function can be expensive.
       */
        outV.noNulls = false;
        if (inputColVector.isRepeating) {
            outV.isRepeating = true;
            outV.isNull[0] = inputColVector.isNull[0];
            if (!inputColVector.isNull[0]) {
                outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]);
            }
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                if (!inputColVector.isNull[i]) {
                    outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
                }
                outV.isNull[i] = inputColVector.isNull[i];
            }
            outV.isRepeating = false;
        } else {
            for (int i = 0; i != n; i++) {
                if (!inputColVector.isNull[i]) {
                    outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
                }
                outV.isNull[i] = inputColVector.isNull[i];
            }
            outV.isRepeating = false;
        }
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)

Example 52 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class StringSubstrColStart method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    BytesColumnVector inV = (BytesColumnVector) batch.cols[colNum];
    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn];
    int n = batch.size;
    if (n == 0) {
        return;
    }
    byte[][] vector = inV.vector;
    int[] sel = batch.selected;
    int[] len = inV.length;
    int[] start = inV.start;
    outV.initBuffer();
    if (inV.isRepeating) {
        outV.isRepeating = true;
        if (!inV.noNulls && inV.isNull[0]) {
            outV.isNull[0] = true;
            outV.noNulls = false;
            outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
            return;
        } else {
            outV.noNulls = true;
            int offset = getSubstrStartOffset(vector[0], start[0], len[0], startIdx);
            if (offset != -1) {
                outV.setVal(0, vector[0], offset, len[0] - (offset - start[0]));
            } else {
                outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
            }
        }
    } else {
        outV.isRepeating = false;
        if (batch.selectedInUse) {
            if (!inV.noNulls) {
                outV.noNulls = false;
                for (int i = 0; i != n; ++i) {
                    int selected = sel[i];
                    if (!inV.isNull[selected]) {
                        int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], startIdx);
                        outV.isNull[selected] = false;
                        if (offset != -1) {
                            outV.setVal(selected, vector[selected], offset, len[selected] - (offset - start[selected]));
                        } else {
                            outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
                        }
                    } else {
                        outV.isNull[selected] = true;
                    }
                }
            } else {
                outV.noNulls = true;
                for (int i = 0; i != n; ++i) {
                    int selected = sel[i];
                    int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], startIdx);
                    if (offset != -1) {
                        outV.setVal(selected, vector[selected], offset, len[selected] - (offset - start[selected]));
                    } else {
                        outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
                    }
                }
            }
        } else {
            if (!inV.noNulls) {
                outV.noNulls = false;
                System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
                for (int i = 0; i != n; ++i) {
                    if (!inV.isNull[i]) {
                        int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx);
                        if (offset != -1) {
                            outV.setVal(i, vector[i], offset, len[i] - (offset - start[i]));
                        } else {
                            outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
                        }
                    }
                }
            } else {
                outV.noNulls = true;
                for (int i = 0; i != n; ++i) {
                    int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx);
                    if (offset != -1) {
                        outV.setVal(i, vector[i], offset, len[i] - (offset - start[i]));
                    } else {
                        outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
                    }
                }
            }
        }
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)

Example 53 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class StringUnaryUDF method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
    int[] sel = batch.selected;
    int n = batch.size;
    byte[][] vector = inputColVector.vector;
    int[] start = inputColVector.start;
    int[] length = inputColVector.length;
    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn];
    outV.initBuffer();
    Text t;
    if (n == 0) {
        //Nothing to do
        return;
    }
    if (inputColVector.noNulls) {
        outV.noNulls = true;
        if (inputColVector.isRepeating) {
            outV.isRepeating = true;
            s.set(vector[0], start[0], length[0]);
            t = func.evaluate(s);
            setString(outV, 0, t);
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                /* Fill output isNull with false for selected elements since there is a chance we'll
           * convert to noNulls == false in setString();
           */
                outV.isNull[i] = false;
                s.set(vector[i], start[i], length[i]);
                t = func.evaluate(s);
                setString(outV, i, t);
            }
            outV.isRepeating = false;
        } else {
            // Set all elements to not null. The setString call can override this.
            Arrays.fill(outV.isNull, 0, n, false);
            for (int i = 0; i != n; i++) {
                s.set(vector[i], start[i], length[i]);
                t = func.evaluate(s);
                setString(outV, i, t);
            }
            outV.isRepeating = false;
        }
    } else {
        // Handle case with nulls. Don't do function if the value is null, to save time,
        // because calling the function can be expensive.
        outV.noNulls = false;
        if (inputColVector.isRepeating) {
            outV.isRepeating = true;
            // setString can override this
            outV.isNull[0] = inputColVector.isNull[0];
            if (!inputColVector.isNull[0]) {
                s.set(vector[0], start[0], length[0]);
                t = func.evaluate(s);
                setString(outV, 0, t);
            }
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                // setString can override this
                outV.isNull[i] = inputColVector.isNull[i];
                if (!inputColVector.isNull[i]) {
                    s.set(vector[i], start[i], length[i]);
                    t = func.evaluate(s);
                    setString(outV, i, t);
                }
            }
            outV.isRepeating = false;
        } else {
            // setString can override this null propagation
            System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
            for (int i = 0; i != n; i++) {
                if (!inputColVector.isNull[i]) {
                    s.set(vector[i], start[i], length[i]);
                    t = func.evaluate(s);
                    setString(outV, i, t);
                }
            }
            outV.isRepeating = false;
        }
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) Text(org.apache.hadoop.io.Text)

Example 54 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class StringUnaryUDFDirect method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn];
    int[] sel = batch.selected;
    int n = batch.size;
    byte[][] vector = inputColVector.vector;
    int[] start = inputColVector.start;
    int[] length = inputColVector.length;
    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn];
    outV.initBuffer();
    if (n == 0) {
        //Nothing to do
        return;
    }
    if (inputColVector.noNulls) {
        outV.noNulls = true;
        if (inputColVector.isRepeating) {
            outV.isRepeating = true;
            func(outV, vector, start, length, 0);
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                func(outV, vector, start, length, i);
            }
            outV.isRepeating = false;
        } else {
            for (int i = 0; i != n; i++) {
                func(outV, vector, start, length, i);
            }
            outV.isRepeating = false;
        }
    } else {
        // Handle case with nulls. Don't do function if the value is null,
        // because the data may be undefined for a null value.
        outV.noNulls = false;
        if (inputColVector.isRepeating) {
            outV.isRepeating = true;
            outV.isNull[0] = inputColVector.isNull[0];
            if (!inputColVector.isNull[0]) {
                func(outV, vector, start, length, 0);
            }
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outV.isNull[i] = inputColVector.isNull[i];
                if (!inputColVector.isNull[i]) {
                    func(outV, vector, start, length, i);
                }
            }
            outV.isRepeating = false;
        } else {
            System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
            for (int i = 0; i != n; i++) {
                if (!inputColVector.isNull[i]) {
                    func(outV, vector, start, length, i);
                }
            }
            outV.isRepeating = false;
        }
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)

Example 55 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class VectorUDFDateDiffScalarCol method evaluateString.

protected void evaluateString(ColumnVector columnVector, LongColumnVector output, int i) {
    BytesColumnVector bcv = (BytesColumnVector) columnVector;
    text.set(bcv.vector[i], bcv.start[i], bcv.length[i]);
    try {
        date.setTime(formatter.parse(text.toString()).getTime());
        output.vector[i] = baseDate - DateWritable.dateToDays(date);
    } catch (ParseException e) {
        output.vector[i] = 1;
        output.isNull[i] = true;
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ParseException(java.text.ParseException)

Aggregations

BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)124 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)66 Test (org.junit.Test)50 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)44 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)12 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)10 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)8 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Text (org.apache.hadoop.io.Text)8 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)6 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)4 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 TypeDescription (org.apache.orc.TypeDescription)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)3 ParseException (java.text.ParseException)3 Random (java.util.Random)3