use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method timestampRead.
protected void timestampRead(boolean isDictionaryEncoding) throws InterruptedException, HiveException, IOException {
conf.set(IOConstants.COLUMNS, "int96_field");
conf.set(IOConstants.COLUMNS_TYPES, "timestamp");
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
VectorizedParquetRecordReader reader = createTestParquetReader("message test { required " + "int96 int96_field;}", conf);
VectorizedRowBatch previous = reader.createValue();
try {
int c = 0;
while (reader.next(NullWritable.get(), previous)) {
TimestampColumnVector vector = (TimestampColumnVector) previous.cols[0];
assertTrue(vector.noNulls);
for (int i = 0; i < vector.nanos.length; i++) {
if (c == nElements) {
break;
}
Timestamp expected = isDictionaryEncoding ? new Timestamp(c % UNIQUE_NUM) : new Timestamp(c);
assertEquals("Not the same time at " + c, expected.getTime(), vector.getTime(i));
assertEquals("Not the same nano at " + c, expected.getNanos(), vector.getNanos(i));
assertFalse(vector.isNull[i]);
c++;
}
}
assertEquals(nElements, c);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method decimalRead.
protected void decimalRead(boolean isDictionaryEncoding) throws Exception {
Configuration readerConf = new Configuration();
readerConf.set(IOConstants.COLUMNS, "value");
readerConf.set(IOConstants.COLUMNS_TYPES, "decimal(5,2)");
readerConf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
readerConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
VectorizedParquetRecordReader reader = createTestParquetReader("message hive_schema { required value (DECIMAL(5,2));}", readerConf);
VectorizedRowBatch previous = reader.createValue();
try {
int c = 0;
while (reader.next(NullWritable.get(), previous)) {
DecimalColumnVector vector = (DecimalColumnVector) previous.cols[0];
assertTrue(vector.noNulls);
for (int i = 0; i < vector.vector.length; i++) {
if (c == nElements) {
break;
}
assertEquals("Check failed at pos " + c, getDecimal(isDictionaryEncoding, c), vector.vector[i].getHiveDecimal());
assertFalse(vector.isNull[i]);
c++;
}
}
assertEquals(nElements, c);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method stringReadDouble.
protected void stringReadDouble(boolean isDictionaryEncoding) throws Exception {
Configuration readerConf = new Configuration();
readerConf.set(IOConstants.COLUMNS, "double_field");
readerConf.set(IOConstants.COLUMNS_TYPES, "string");
readerConf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
readerConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
VectorizedParquetRecordReader reader = createTestParquetReader("message test { required double double_field;}", readerConf);
VectorizedRowBatch previous = reader.createValue();
try {
int c = 0;
while (reader.next(NullWritable.get(), previous)) {
BytesColumnVector vector = (BytesColumnVector) previous.cols[0];
assertTrue(vector.noNulls);
for (int i = 0; i < vector.vector.length; i++) {
if (c == nElements) {
break;
}
String actual = new String(Arrays.copyOfRange(vector.vector[i], vector.start[i], vector.start[i] + vector.length[i]));
assertEquals("Failed at " + c, String.valueOf(getDoubleValue(isDictionaryEncoding, c)), actual);
assertFalse(vector.isNull[i]);
c++;
}
}
assertEquals(nElements, c);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method doubleReadInt.
protected void doubleReadInt(boolean isDictionaryEncoding) throws InterruptedException, HiveException, IOException {
conf.set(IOConstants.COLUMNS, "int32_field");
conf.set(IOConstants.COLUMNS_TYPES, "double");
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
VectorizedParquetRecordReader reader = createTestParquetReader("message test { required int32 int32_field;}", conf);
VectorizedRowBatch previous = reader.createValue();
try {
int c = 0;
while (reader.next(NullWritable.get(), previous)) {
DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0];
assertTrue(vector.noNulls);
for (int i = 0; i < vector.vector.length; i++) {
if (c == nElements) {
break;
}
assertEquals("Failed at " + c, getIntValue(isDictionaryEncoding, c), vector.vector[i], 0);
assertFalse(vector.isNull[i]);
c++;
}
}
assertEquals(nElements, c);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method createParquetReader.
protected VectorizedParquetRecordReader createParquetReader(String schemaString, Configuration conf) throws IOException, InterruptedException, HiveException {
conf.set(PARQUET_READ_SCHEMA, schemaString);
HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
HiveConf.setVar(conf, HiveConf.ConfVars.PLAN, "//tmp");
Job vectorJob = new Job(conf, "read vector");
ParquetInputFormat.setInputPaths(vectorJob, file);
ParquetInputFormat parquetInputFormat = new ParquetInputFormat(GroupReadSupport.class);
ParquetInputSplit split = (ParquetInputSplit) parquetInputFormat.getSplits(vectorJob).get(0);
initialVectorizedRowBatchCtx(conf);
return new VectorizedParquetRecordReader(split, new JobConf(conf));
}
Aggregations