use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method binaryRead.
protected void binaryRead(boolean isDictionaryEncoding) throws Exception {
Configuration conf = new Configuration();
conf.set(IOConstants.COLUMNS, "binary_field_some_null");
conf.set(IOConstants.COLUMNS_TYPES, "string");
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
VectorizedParquetRecordReader reader = createParquetReader("message test { required binary binary_field_some_null;}", conf);
VectorizedRowBatch previous = reader.createValue();
int c = 0;
try {
while (reader.next(NullWritable.get(), previous)) {
BytesColumnVector vector = (BytesColumnVector) previous.cols[0];
boolean noNull = true;
for (int i = 0; i < vector.vector.length; i++) {
if (c == nElements) {
break;
}
String actual;
assertEquals("Null assert failed at " + c, isNull(c), vector.isNull[i]);
if (!vector.isNull[i]) {
actual = new String(ArrayUtils.subarray(vector.vector[i], vector.start[i], vector.start[i] + vector.length[i]));
assertEquals("failed at " + c, getStr(isDictionaryEncoding, c), actual);
} else {
noNull = false;
}
c++;
}
assertEquals("No Null check failed at " + c, noNull, vector.noNulls);
assertFalse(vector.isRepeating);
}
assertEquals("It doesn't exit at expected position", nElements, c);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method decimalRead.
protected void decimalRead(boolean isDictionaryEncoding) throws Exception {
Configuration conf = new Configuration();
conf.set(IOConstants.COLUMNS, "value");
conf.set(IOConstants.COLUMNS_TYPES, "decimal(5,2)");
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
VectorizedParquetRecordReader reader = createParquetReader("message hive_schema { required value (DECIMAL(5,2));}", conf);
VectorizedRowBatch previous = reader.createValue();
try {
int c = 0;
while (reader.next(NullWritable.get(), previous)) {
DecimalColumnVector vector = (DecimalColumnVector) previous.cols[0];
assertTrue(vector.noNulls);
for (int i = 0; i < vector.vector.length; i++) {
if (c == nElements) {
break;
}
assertEquals("Check failed at pos " + c, getDecimal(isDictionaryEncoding, c), vector.vector[i].getHiveDecimal());
assertFalse(vector.isNull[i]);
c++;
}
}
assertEquals(nElements, c);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method nestedStructRead0.
protected void nestedStructRead0(boolean isDictionaryEncoding) throws Exception {
Configuration conf = new Configuration();
conf.set(IOConstants.COLUMNS, "nested_struct_field");
conf.set(IOConstants.COLUMNS_TYPES, "struct<nsf:struct<c:int,d:int>,e:double>");
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
String schema = "message hive_schema {\n" + "group nested_struct_field {\n" + " optional group nsf {\n" + " optional int32 c;\n" + " optional int32 d;\n" + " }" + "optional double e;\n" + "}\n";
VectorizedParquetRecordReader reader = createParquetReader(schema, conf);
VectorizedRowBatch previous = reader.createValue();
int c = 0;
try {
while (reader.next(NullWritable.get(), previous)) {
StructColumnVector vector = (StructColumnVector) previous.cols[0];
StructColumnVector sv = (StructColumnVector) vector.fields[0];
LongColumnVector cv = (LongColumnVector) sv.fields[0];
LongColumnVector dv = (LongColumnVector) sv.fields[1];
DoubleColumnVector ev = (DoubleColumnVector) vector.fields[1];
for (int i = 0; i < cv.vector.length; i++) {
if (c == nElements) {
break;
}
assertEquals(getIntValue(isDictionaryEncoding, c), cv.vector[i]);
assertEquals(getIntValue(isDictionaryEncoding, c), dv.vector[i]);
assertEquals(getDoubleValue(isDictionaryEncoding, c), ev.vector[i], 0);
assertFalse(vector.isNull[i]);
assertFalse(vector.isRepeating);
c++;
}
}
assertEquals("It doesn't exit at expected position", nElements, c);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method floatRead.
protected void floatRead(boolean isDictionaryEncoding) throws Exception {
Configuration conf = new Configuration();
conf.set(IOConstants.COLUMNS, "float_field");
conf.set(IOConstants.COLUMNS_TYPES, "float");
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
VectorizedParquetRecordReader reader = createParquetReader("message test { required float float_field;}", conf);
VectorizedRowBatch previous = reader.createValue();
try {
int c = 0;
while (reader.next(NullWritable.get(), previous)) {
DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0];
assertTrue(vector.noNulls);
for (int i = 0; i < vector.vector.length; i++) {
if (c == nElements) {
break;
}
assertEquals("Failed at " + c, getFloatValue(isDictionaryEncoding, c), vector.vector[i], 0);
assertFalse(vector.isNull[i]);
c++;
}
}
assertEquals(nElements, c);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method intRead.
protected void intRead(boolean isDictionaryEncoding) throws InterruptedException, HiveException, IOException {
Configuration conf = new Configuration();
conf.set(IOConstants.COLUMNS, "int32_field");
conf.set(IOConstants.COLUMNS_TYPES, "int");
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
VectorizedParquetRecordReader reader = createParquetReader("message test { required int32 int32_field;}", conf);
VectorizedRowBatch previous = reader.createValue();
try {
int c = 0;
while (reader.next(NullWritable.get(), previous)) {
LongColumnVector vector = (LongColumnVector) previous.cols[0];
assertTrue(vector.noNulls);
for (int i = 0; i < vector.vector.length; i++) {
if (c == nElements) {
break;
}
assertEquals("Failed at " + c, getIntValue(isDictionaryEncoding, c), vector.vector[i]);
assertFalse(vector.isNull[i]);
c++;
}
}
assertEquals(nElements, c);
} finally {
reader.close();
}
}
Aggregations