Search in sources :

Example 1 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class VectorizedOrcSerde method serialize.

@Override
public Writable serialize(Object obj, ObjectInspector inspector) {
    VectorizedRowBatch batch = (VectorizedRowBatch) obj;
    try {
        for (int i = 0; i < batch.size; i++) {
            OrcStruct ost = orcStructArray[i];
            if (ost == null) {
                ost = new OrcStruct(batch.numCols);
                orcStructArray[i] = ost;
            }
            int index = 0;
            if (batch.selectedInUse) {
                index = batch.selected[i];
            } else {
                index = i;
            }
            for (int p = 0; p < batch.projectionSize; p++) {
                int k = batch.projectedColumns[p];
                if (batch.cols[k].isRepeating) {
                    valueWriters[p].setValue(ost, batch.cols[k], 0);
                } else {
                    valueWriters[p].setValue(ost, batch.cols[k], index);
                }
            }
            OrcSerdeRow row = (OrcSerdeRow) orcRowArray[i];
            row.realRow = ost;
            row.inspector = inspector;
        }
    } catch (HiveException ex) {
        throw new RuntimeException(ex);
    }
    ow.set(orcRowArray);
    return ow;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 2 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class SparkReduceRecordHandler method processVectors.

/**
   * @param values
   * @return true if it is not done and can take more inputs
   */
private <E> boolean processVectors(Iterator<E> values, byte tag) throws HiveException {
    VectorizedRowBatch batch = batches[tag];
    batch.reset();
    buffer.reset();
    /* deserialize key into columns */
    VectorizedBatchUtil.addRowToBatchFrom(keyObject, keyStructInspector, 0, 0, batch, buffer);
    for (int i = 0; i < keysColumnOffset; i++) {
        VectorizedBatchUtil.setRepeatingColumn(batch, i);
    }
    int rowIdx = 0;
    try {
        while (values.hasNext()) {
            /* deserialize value into columns */
            BytesWritable valueWritable = (BytesWritable) values.next();
            Object valueObj = deserializeValue(valueWritable, tag);
            VectorizedBatchUtil.addRowToBatchFrom(valueObj, valueStructInspectors[tag], rowIdx, keysColumnOffset, batch, buffer);
            rowIdx++;
            if (rowIdx >= BATCH_SIZE) {
                VectorizedBatchUtil.setBatchSize(batch, rowIdx);
                reducer.process(batch, tag);
                rowIdx = 0;
                if (isLogInfoEnabled) {
                    logMemoryInfo();
                }
            }
        }
        if (rowIdx > 0) {
            VectorizedBatchUtil.setBatchSize(batch, rowIdx);
            reducer.process(batch, tag);
        }
        if (isLogInfoEnabled) {
            logMemoryInfo();
        }
    } catch (Exception e) {
        String rowString = null;
        try {
            rowString = batch.toString();
        } catch (Exception e2) {
            rowString = "[Error getting row data with exception " + StringUtils.stringifyException(e2) + " ]";
        }
        throw new HiveException("Error while processing vector batch (tag=" + tag + ") " + rowString, e);
    }
    // give me more
    return true;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BytesWritable(org.apache.hadoop.io.BytesWritable) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 3 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorGenericDateExpressions method testDateAddColCol.

private void testDateAddColCol(VectorExpression.Type colType1, boolean isPositive) {
    LongColumnVector date1 = newRandomLongColumnVector(10000, size);
    LongColumnVector days2 = newRandomLongColumnVector(1000, size);
    ColumnVector col1 = castTo(date1, colType1);
    LongColumnVector output = new LongColumnVector(size);
    VectorizedRowBatch batch = new VectorizedRowBatch(3, size);
    batch.cols[0] = col1;
    batch.cols[1] = days2;
    batch.cols[2] = output;
    validateDateAdd(batch, date1, days2, colType1, isPositive);
    TestVectorizedRowBatch.addRandomNulls(date1);
    batch.cols[0] = castTo(date1, colType1);
    validateDateAdd(batch, date1, days2, colType1, isPositive);
    TestVectorizedRowBatch.addRandomNulls(days2);
    batch.cols[1] = days2;
    validateDateAdd(batch, date1, days2, colType1, isPositive);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector)

Example 4 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorGenericDateExpressions method testDateAddColScalar.

private void testDateAddColScalar(VectorExpression.Type colType1, boolean isPositive) {
    LongColumnVector date1 = newRandomLongColumnVector(10000, size);
    ColumnVector col1 = castTo(date1, colType1);
    long scalar2 = newRandom(1000);
    LongColumnVector output = new LongColumnVector(size);
    VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
    batch.cols[0] = col1;
    batch.cols[1] = output;
    validateDateAdd(batch, colType1, scalar2, isPositive, date1);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    validateDateAdd(batch, colType1, scalar2, isPositive, date1);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector)

Example 5 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorGenericDateExpressions method testDateAddScalarCol.

private void testDateAddScalarCol(VectorExpression.Type colType1, boolean isPositive) {
    LongColumnVector date2 = newRandomLongColumnVector(10000, size);
    long scalar1 = newRandom(1000);
    LongColumnVector output = new LongColumnVector(size);
    VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
    batch.cols[0] = date2;
    batch.cols[1] = output;
    validateDateAdd(batch, scalar1, date2, colType1, isPositive);
    TestVectorizedRowBatch.addRandomNulls(date2);
    batch.cols[0] = date2;
    validateDateAdd(batch, scalar1, date2, colType1, isPositive);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Aggregations

VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)401 Test (org.junit.Test)214 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)157 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)98 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)83 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)64 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)40 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)32 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)30 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)28 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)26 Configuration (org.apache.hadoop.conf.Configuration)23 IOException (java.io.IOException)20 HiveConf (org.apache.hadoop.hive.conf.HiveConf)20 VectorExtractRow (org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)19 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)18 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)18 Timestamp (java.sql.Timestamp)17 VectorUDFAdaptor (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor)16 VectorizedRowBatchCtx (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx)15