Search in sources :

Example 96 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorizedColumnReaderTestBase method structRead.

protected void structRead(boolean isDictionaryEncoding) throws Exception {
    Configuration conf = new Configuration();
    conf.set(IOConstants.COLUMNS, "struct_field");
    conf.set(IOConstants.COLUMNS_TYPES, "struct<a:int,b:double>");
    conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
    String schema = "message hive_schema {\n" + "group struct_field {\n" + "  optional int32 a;\n" + "  optional double b;\n" + "}\n" + "}\n";
    VectorizedParquetRecordReader reader = createTestParquetReader(schema, conf);
    VectorizedRowBatch previous = reader.createValue();
    int c = 0;
    try {
        while (reader.next(NullWritable.get(), previous)) {
            StructColumnVector vector = (StructColumnVector) previous.cols[0];
            LongColumnVector cv = (LongColumnVector) vector.fields[0];
            DoubleColumnVector dv = (DoubleColumnVector) vector.fields[1];
            for (int i = 0; i < cv.vector.length; i++) {
                if (c == nElements) {
                    break;
                }
                assertEquals(getIntValue(isDictionaryEncoding, c), cv.vector[i]);
                assertEquals(getDoubleValue(isDictionaryEncoding, c), dv.vector[i], 0);
                assertFalse(vector.isNull[i]);
                assertFalse(vector.isRepeating);
                c++;
            }
        }
        assertEquals("It doesn't exit at expected position", nElements, c);
    } finally {
        reader.close();
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Configuration(org.apache.hadoop.conf.Configuration) VectorizedParquetRecordReader(org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader) StructColumnVector(org.apache.hadoop.hive.ql.exec.vector.StructColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 97 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorizedColumnReaderTestBase method structReadSomeNull.

protected void structReadSomeNull(boolean isDictionaryEncoding) throws Exception {
    Configuration conf = new Configuration();
    conf.set(IOConstants.COLUMNS, "struct_field_some_null");
    conf.set(IOConstants.COLUMNS_TYPES, "struct<f:int,g:double>");
    conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
    String schema = "message hive_schema {\n" + "group struct_field_some_null {\n" + "  optional int32 f;\n" + "  optional double g;\n" + "}\n";
    VectorizedParquetRecordReader reader = createTestParquetReader(schema, conf);
    VectorizedRowBatch previous = reader.createValue();
    int c = 0;
    try {
        while (reader.next(NullWritable.get(), previous)) {
            StructColumnVector sv = (StructColumnVector) previous.cols[0];
            LongColumnVector fv = (LongColumnVector) sv.fields[0];
            DoubleColumnVector gv = (DoubleColumnVector) sv.fields[1];
            for (int i = 0; i < fv.vector.length; i++) {
                if (c == nElements) {
                    break;
                }
                assertEquals(c % 2 == 0, fv.isNull[i]);
                assertEquals(c % 3 == 0, gv.isNull[i]);
                assertEquals(c % /* 2*3 = */
                6 == 0, sv.isNull[i]);
                if (!sv.isNull[i]) {
                    if (!fv.isNull[i]) {
                        assertEquals(getIntValue(isDictionaryEncoding, c), fv.vector[i]);
                    }
                    if (!gv.isNull[i]) {
                        assertEquals(getDoubleValue(isDictionaryEncoding, c), gv.vector[i], 0);
                    }
                }
                assertFalse(fv.isRepeating);
                c++;
            }
        }
        assertEquals("It doesn't exit at expected position", nElements, c);
    } finally {
        reader.close();
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Configuration(org.apache.hadoop.conf.Configuration) VectorizedParquetRecordReader(org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader) StructColumnVector(org.apache.hadoop.hive.ql.exec.vector.StructColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 98 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorizedColumnReaderTestBase method floatReadInt.

protected void floatReadInt(boolean isDictionaryEncoding) throws InterruptedException, HiveException, IOException {
    conf.set(IOConstants.COLUMNS, "int32_field");
    conf.set(IOConstants.COLUMNS_TYPES, "float");
    conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
    VectorizedParquetRecordReader reader = createTestParquetReader("message test { required int32" + " int32_field;}", conf);
    VectorizedRowBatch previous = reader.createValue();
    try {
        int c = 0;
        while (reader.next(NullWritable.get(), previous)) {
            DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0];
            assertTrue(vector.noNulls);
            for (int i = 0; i < vector.vector.length; i++) {
                if (c == nElements) {
                    break;
                }
                assertEquals("Failed at " + c, getIntValue(isDictionaryEncoding, c), vector.vector[i], 0);
                assertFalse(vector.isNull[i]);
                c++;
            }
        }
        assertEquals(nElements, c);
    } finally {
        reader.close();
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) VectorizedParquetRecordReader(org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)

Example 99 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorBatchGenerator method assignColumnVectors.

public void assignColumnVectors(VectorizedRowBatch batch, int columnNum, VectorColumnGroupGenerator columnGroup) {
    // UNDONE: Multiple types...
    GenerateType[] generateTypes = columnGroup.generateTypes();
    GenerateType generateType = generateTypes[0];
    ColumnVector colVector;
    switch(generateType.getCategory()) {
        case BOOLEAN:
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
            colVector = new LongColumnVector();
            break;
        case FLOAT:
        case DOUBLE:
            colVector = new DoubleColumnVector();
            break;
        case STRING:
            colVector = new BytesColumnVector();
            break;
        // UNDONE
        case DATE:
        case TIMESTAMP:
        case BINARY:
        case DECIMAL:
        case VARCHAR:
        case CHAR:
        case LIST:
        case MAP:
        case STRUCT:
        case UNION:
        default:
            throw new RuntimeException("Unsupported catagory " + generateType.getCategory());
    }
    colVector.init();
    batch.cols[columnNum] = colVector;
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 100 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorColumnGroupGenerator method populateBatchColumn.

private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnIndex, int size) {
    int columnNum = columnNums[logicalColumnIndex];
    ColumnVector colVector = batch.cols[columnNum];
    GenerateType generateType = generateTypes[logicalColumnIndex];
    GenerateCategory category = generateType.getCategory();
    Object array = arrays[logicalColumnIndex];
    switch(category) {
        case BOOLEAN:
            {
                boolean[] booleanArray = ((boolean[]) array);
                long[] vector = ((LongColumnVector) colVector).vector;
                for (int i = 0; i < size; i++) {
                    vector[i] = (booleanArray[i] ? 1 : 0);
                }
            }
            break;
        case BYTE:
            {
                byte[] byteArray = ((byte[]) array);
                long[] vector = ((LongColumnVector) colVector).vector;
                for (int i = 0; i < size; i++) {
                    vector[i] = byteArray[i];
                }
            }
            break;
        case SHORT:
            {
                short[] shortArray = ((short[]) array);
                long[] vector = ((LongColumnVector) colVector).vector;
                for (int i = 0; i < size; i++) {
                    vector[i] = shortArray[i];
                }
            }
            break;
        case INT:
            {
                int[] intArray = ((int[]) array);
                long[] vector = ((LongColumnVector) colVector).vector;
                for (int i = 0; i < size; i++) {
                    vector[i] = intArray[i];
                }
            }
            break;
        case LONG:
            {
                long[] longArray = ((long[]) array);
                long[] vector = ((LongColumnVector) colVector).vector;
                for (int i = 0; i < size; i++) {
                    vector[i] = longArray[i];
                }
            }
            break;
        case FLOAT:
            {
                float[] floatArray = ((float[]) array);
                double[] vector = ((DoubleColumnVector) colVector).vector;
                for (int i = 0; i < size; i++) {
                    vector[i] = floatArray[i];
                }
            }
            break;
        case DOUBLE:
            {
                double[] doubleArray = ((double[]) array);
                double[] vector = ((DoubleColumnVector) colVector).vector;
                for (int i = 0; i < size; i++) {
                    vector[i] = doubleArray[i];
                }
            }
            break;
        case STRING:
            {
                String[] stringArray = ((String[]) array);
                BytesColumnVector bytesColVec = ((BytesColumnVector) colVector);
                for (int i = 0; i < size; i++) {
                    byte[] bytes = stringArray[i].getBytes();
                    bytesColVec.setVal(i, bytes);
                }
            }
            break;
        case TIMESTAMP:
            {
                Timestamp[] timestampArray = ((Timestamp[]) array);
                TimestampColumnVector timestampColVec = ((TimestampColumnVector) colVector);
                for (int i = 0; i < size; i++) {
                    Timestamp timestamp = timestampArray[i];
                    timestampColVec.set(i, timestamp);
                }
            }
            break;
        case DATE:
        case BINARY:
        case DECIMAL:
        case VARCHAR:
        case CHAR:
        case LIST:
        case MAP:
        case STRUCT:
        case UNION:
        default:
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) GenerateType(org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType) GenerateCategory(org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) Timestamp(java.sql.Timestamp) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Aggregations

DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)104 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)59 Test (org.junit.Test)37 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)33 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)18 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)17 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)13 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)10 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)9 Configuration (org.apache.hadoop.conf.Configuration)7 Random (java.util.Random)5 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)5 Timestamp (java.sql.Timestamp)4 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)4 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)4 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)3 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)2 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)2 ListColumnVector (org.apache.hadoop.hive.ql.exec.vector.ListColumnVector)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2