use of org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch in project incubator-gobblin by apache.
the class GenericRecordToOrcValueWriterTest method testUnionRecordConversionWriter.
@Test
public void testUnionRecordConversionWriter() throws Exception {
Schema schema = new Schema.Parser().parse(this.getClass().getClassLoader().getResourceAsStream("union_test/schema.avsc"));
TypeDescription orcSchema = AvroOrcSchemaConverter.getOrcSchema(schema);
GenericRecordToOrcValueWriter valueWriter = new GenericRecordToOrcValueWriter(orcSchema, schema);
VectorizedRowBatch rowBatch = orcSchema.createRowBatch();
List<GenericRecord> recordList = GobblinOrcWriterTest.deserializeAvroRecords(this.getClass(), schema, "union_test/data.json");
for (GenericRecord record : recordList) {
valueWriter.write(record, rowBatch);
}
// Flush RowBatch into disk.
File tempFile = new File(Files.createTempDir(), "orc");
tempFile.deleteOnExit();
Path filePath = new Path(tempFile.getAbsolutePath());
OrcFile.WriterOptions options = OrcFile.writerOptions(new Properties(), new Configuration());
options.setSchema(orcSchema);
Writer orcFileWriter = OrcFile.createWriter(filePath, options);
orcFileWriter.addRowBatch(rowBatch);
orcFileWriter.close();
// Load it back and compare.
FileSystem fs = FileSystem.get(new Configuration());
List<Writable> orcRecords = deserializeOrcRecords(filePath, fs);
Assert.assertEquals(orcRecords.size(), 5);
// Knowing all of them are OrcStruct<OrcUnion>, save the effort to recursively convert GenericRecord to OrcStruct
// for comprehensive comparison which is non-trivial,
// although it is also theoretically possible and optimal way for doing this unit test.
List<OrcUnion> unionList = orcRecords.stream().map(this::getUnionFieldFromStruct).collect(Collectors.toList());
// Constructing all OrcUnion and verify all of them appears in unionList.
TypeDescription unionSchema = orcSchema.getChildren().get(0);
OrcUnion union_0 = new OrcUnion(unionSchema);
union_0.set((byte) 0, new Text("urn:li:member:3"));
Assert.assertTrue(unionList.contains(union_0));
OrcUnion union_1 = new OrcUnion(unionSchema);
union_1.set((byte) 0, new Text("urn:li:member:4"));
Assert.assertTrue(unionList.contains(union_1));
OrcUnion union_2 = new OrcUnion(unionSchema);
union_2.set((byte) 1, new IntWritable(2));
Assert.assertTrue(unionList.contains(union_2));
OrcUnion union_3 = new OrcUnion(unionSchema);
union_3.set((byte) 1, new IntWritable(1));
Assert.assertTrue(unionList.contains(union_3));
OrcUnion union_4 = new OrcUnion(unionSchema);
union_4.set((byte) 1, new IntWritable(3));
Assert.assertTrue(unionList.contains(union_4));
}
use of org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch in project incubator-gobblin by apache.
the class GenericRecordToOrcValueWriterTest method testDecimalRecordConversionWriter.
@Test
public void testDecimalRecordConversionWriter() throws Exception {
Schema schema = new Schema.Parser().parse(this.getClass().getClassLoader().getResourceAsStream("decimal_test/schema.avsc"));
TypeDescription orcSchema = AvroOrcSchemaConverter.getOrcSchema(schema);
GenericRecordToOrcValueWriter valueWriter = new GenericRecordToOrcValueWriter(orcSchema, schema);
VectorizedRowBatch rowBatch = orcSchema.createRowBatch();
List<GenericRecord> recordList = GobblinOrcWriterTest.deserializeAvroRecords(this.getClass(), schema, "decimal_test/data.json");
for (GenericRecord record : recordList) {
valueWriter.write(record, rowBatch);
}
// Flush RowBatch into disk.
File tempFile = new File(Files.createTempDir(), "orc");
tempFile.deleteOnExit();
Path filePath = new Path(tempFile.getAbsolutePath());
OrcFile.WriterOptions options = OrcFile.writerOptions(new Properties(), new Configuration());
options.setSchema(orcSchema);
Writer orcFileWriter = OrcFile.createWriter(filePath, options);
orcFileWriter.addRowBatch(rowBatch);
orcFileWriter.close();
// Load it back and compare.
FileSystem fs = FileSystem.get(new Configuration());
List<Writable> orcRecords = deserializeOrcRecords(filePath, fs);
Assert.assertEquals(orcRecords.size(), 2);
Assert.assertEquals(orcRecords.get(0).toString(), "{3.4}");
Assert.assertEquals(orcRecords.get(1).toString(), "{5.97}");
}
use of org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch in project incubator-gobblin by apache.
the class GenericRecordToOrcValueWriterTest method testListResize.
@Test
public void testListResize() throws Exception {
Schema schema = new Schema.Parser().parse(this.getClass().getClassLoader().getResourceAsStream("list_map_test/schema.avsc"));
TypeDescription orcSchema = AvroOrcSchemaConverter.getOrcSchema(schema);
GenericRecordToOrcValueWriter valueWriter = new GenericRecordToOrcValueWriter(orcSchema, schema);
// Make the batch size very small so that the enlarge behavior would easily be triggered.
// But this has to more than the number of records that we deserialized form data.json, as here we don't reset batch.
VectorizedRowBatch rowBatch = orcSchema.createRowBatch(10);
List<GenericRecord> recordList = GobblinOrcWriterTest.deserializeAvroRecords(this.getClass(), schema, "list_map_test/data.json");
Assert.assertEquals(recordList.size(), 6);
for (GenericRecord record : recordList) {
valueWriter.write(record, rowBatch);
}
// Examining resize count, which should happen only once for map and list, so totally 2.
Assert.assertEquals(valueWriter.resizeCount, 2);
}
Aggregations