use of org.apache.hadoop.hive.ql.exec.vector.StructColumnVector in project hive by apache.
the class VectorizedColumnReaderTestBase method nestedStructRead0.
protected void nestedStructRead0(boolean isDictionaryEncoding) throws Exception {
Configuration conf = new Configuration();
conf.set(IOConstants.COLUMNS, "nested_struct_field");
conf.set(IOConstants.COLUMNS_TYPES, "struct<nsf:struct<c:int,d:int>,e:double>");
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
String schema = "message hive_schema {\n" + "group nested_struct_field {\n" + " optional group nsf {\n" + " optional int32 c;\n" + " optional int32 d;\n" + " }" + "optional double e;\n" + "}\n";
VectorizedParquetRecordReader reader = createParquetReader(schema, conf);
VectorizedRowBatch previous = reader.createValue();
int c = 0;
try {
while (reader.next(NullWritable.get(), previous)) {
StructColumnVector vector = (StructColumnVector) previous.cols[0];
StructColumnVector sv = (StructColumnVector) vector.fields[0];
LongColumnVector cv = (LongColumnVector) sv.fields[0];
LongColumnVector dv = (LongColumnVector) sv.fields[1];
DoubleColumnVector ev = (DoubleColumnVector) vector.fields[1];
for (int i = 0; i < cv.vector.length; i++) {
if (c == nElements) {
break;
}
assertEquals(getIntValue(isDictionaryEncoding, c), cv.vector[i]);
assertEquals(getIntValue(isDictionaryEncoding, c), dv.vector[i]);
assertEquals(getDoubleValue(isDictionaryEncoding, c), ev.vector[i], 0);
assertFalse(vector.isNull[i]);
assertFalse(vector.isRepeating);
c++;
}
}
assertEquals("It doesn't exit at expected position", nElements, c);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.exec.vector.StructColumnVector in project hive by apache.
the class BatchToRowReader method nextStruct.
public StructType nextStruct(ColumnVector vector, int row, StructTypeInfo schema, Object previous) {
if (vector.isRepeating) {
row = 0;
}
if (vector.noNulls || !vector.isNull[row]) {
List<TypeInfo> childrenTypes = schema.getAllStructFieldTypeInfos();
StructType result = createStructObject(previous, childrenTypes);
StructColumnVector struct = (StructColumnVector) vector;
for (int f = 0; f < childrenTypes.size(); ++f) {
setStructCol(result, f, nextValue(struct.fields[f], row, childrenTypes.get(f), getStructCol(result, f)));
}
return result;
} else {
return null;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.StructColumnVector in project hive by apache.
the class VectorizedStructColumnReader method readBatch.
@Override
public void readBatch(int total, ColumnVector column, TypeInfo columnType) throws IOException {
StructColumnVector structColumnVector = (StructColumnVector) column;
StructTypeInfo structTypeInfo = (StructTypeInfo) columnType;
ColumnVector[] vectors = structColumnVector.fields;
for (int i = 0; i < vectors.length; i++) {
fieldReaders.get(i).readBatch(total, vectors[i], structTypeInfo.getAllStructFieldTypeInfos().get(i));
structColumnVector.isRepeating = structColumnVector.isRepeating && vectors[i].isRepeating;
for (int j = 0; j < vectors[i].isNull.length; j++) {
structColumnVector.isNull[j] = (i == 0) ? vectors[i].isNull[j] : structColumnVector.isNull[j] && vectors[i].isNull[j];
}
structColumnVector.noNulls = (i == 0) ? vectors[i].noNulls : structColumnVector.noNulls && vectors[i].noNulls;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.StructColumnVector in project hive by apache.
the class RecordReaderImpl method nextStruct.
static OrcStruct nextStruct(ColumnVector vector, int row, TypeDescription schema, Object previous) {
if (vector.isRepeating) {
row = 0;
}
if (vector.noNulls || !vector.isNull[row]) {
OrcStruct result;
List<TypeDescription> childrenTypes = schema.getChildren();
int numChildren = childrenTypes.size();
if (previous == null || previous.getClass() != OrcStruct.class) {
result = new OrcStruct(numChildren);
} else {
result = (OrcStruct) previous;
result.setNumFields(numChildren);
}
StructColumnVector struct = (StructColumnVector) vector;
for (int f = 0; f < numChildren; ++f) {
result.setFieldValue(f, nextValue(struct.fields[f], row, childrenTypes.get(f), result.getFieldValue(f)));
}
return result;
} else {
return null;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.StructColumnVector in project hive by apache.
the class TestInputOutputFormat method testSchemaEvolution.
/**
* Test schema evolution when using the reader directly.
*/
@Test
public void testSchemaEvolution() throws Exception {
TypeDescription fileSchema = TypeDescription.fromString("struct<a:int,b:struct<c:int>,d:string>");
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).fileSystem(fs).setSchema(fileSchema).compress(org.apache.orc.CompressionKind.NONE));
VectorizedRowBatch batch = fileSchema.createRowBatch(1000);
batch.size = 1000;
LongColumnVector lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
for (int r = 0; r < 1000; r++) {
((LongColumnVector) batch.cols[0]).vector[r] = r * 42;
lcv.vector[r] = r * 10001;
((BytesColumnVector) batch.cols[2]).setVal(r, Integer.toHexString(r).getBytes(StandardCharsets.UTF_8));
}
writer.addRowBatch(batch);
writer.close();
TypeDescription readerSchema = TypeDescription.fromString("struct<a:int,b:struct<c:int,future1:int>,d:string,future2:int>");
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
RecordReader rows = reader.rowsOptions(new Reader.Options().schema(readerSchema));
batch = readerSchema.createRowBatch();
lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
LongColumnVector future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]);
assertEquals(true, rows.nextBatch(batch));
assertEquals(1000, batch.size);
assertEquals(true, future1.isRepeating);
assertEquals(true, future1.isNull[0]);
assertEquals(true, batch.cols[3].isRepeating);
assertEquals(true, batch.cols[3].isNull[0]);
for (int r = 0; r < batch.size; ++r) {
assertEquals("row " + r, r * 42, ((LongColumnVector) batch.cols[0]).vector[r]);
assertEquals("row " + r, r * 10001, lcv.vector[r]);
assertEquals("row " + r, r * 10001, lcv.vector[r]);
assertEquals("row " + r, Integer.toHexString(r), ((BytesColumnVector) batch.cols[2]).toString(r));
}
assertEquals(false, rows.nextBatch(batch));
rows.close();
// try it again with an include vector
rows = reader.rowsOptions(new Reader.Options().schema(readerSchema).include(new boolean[] { false, true, true, true, false, false, true }));
batch = readerSchema.createRowBatch();
lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]);
assertEquals(true, rows.nextBatch(batch));
assertEquals(1000, batch.size);
assertEquals(true, future1.isRepeating);
assertEquals(true, future1.isNull[0]);
assertEquals(true, batch.cols[3].isRepeating);
assertEquals(true, batch.cols[3].isNull[0]);
assertEquals(true, batch.cols[2].isRepeating);
assertEquals(true, batch.cols[2].isNull[0]);
for (int r = 0; r < batch.size; ++r) {
assertEquals("row " + r, r * 42, ((LongColumnVector) batch.cols[0]).vector[r]);
assertEquals("row " + r, r * 10001, lcv.vector[r]);
}
assertEquals(false, rows.nextBatch(batch));
rows.close();
}
Aggregations