use of org.apache.drill.exec.vector.accessor.TupleWriter in project drill by axbaretto.
the class TestResultSetLoaderMaps method testNestedMapsRequired.
/**
* Create nested maps. Then, add columns to each map
* on the fly. Use required, variable-width columns since
* those require the most processing and are most likely to
* fail if anything is out of place.
*/
@Test
public void testNestedMapsRequired() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addMap("m1").add("b", MinorType.VARCHAR).addMap("m2").add("c", MinorType.VARCHAR).resumeMap().resumeSchema().buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
assertEquals(5, rsLoader.schemaVersion());
RowSetLoader rootWriter = rsLoader.writer();
rsLoader.startBatch();
rootWriter.addRow(10, objArray("b1", objArray("c1")));
// Validate first batch
RowSet actual = fixture.wrap(rsLoader.harvest());
assertEquals(5, rsLoader.schemaVersion());
SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(10, objArray("b1", objArray("c1"))).build();
new RowSetComparison(expected).verifyAndClearAll(actual);
// Now add columns in the second batch.
rsLoader.startBatch();
rootWriter.addRow(20, objArray("b2", objArray("c2")));
TupleWriter m1Writer = rootWriter.tuple("m1");
m1Writer.addColumn(SchemaBuilder.columnSchema("d", MinorType.VARCHAR, DataMode.REQUIRED));
TupleWriter m2Writer = m1Writer.tuple("m2");
m2Writer.addColumn(SchemaBuilder.columnSchema("e", MinorType.VARCHAR, DataMode.REQUIRED));
rootWriter.addRow(30, objArray("b3", objArray("c3", "e3"), "d3"));
// And another set while the write proceeds.
m1Writer.addColumn(SchemaBuilder.columnSchema("f", MinorType.VARCHAR, DataMode.REQUIRED));
m2Writer.addColumn(SchemaBuilder.columnSchema("g", MinorType.VARCHAR, DataMode.REQUIRED));
rootWriter.addRow(40, objArray("b4", objArray("c4", "e4", "g4"), "d4", "e4"));
// Validate second batch
actual = fixture.wrap(rsLoader.harvest());
assertEquals(9, rsLoader.schemaVersion());
TupleMetadata expectedSchema = new SchemaBuilder().add("a", MinorType.INT).addMap("m1").add("b", MinorType.VARCHAR).addMap("m2").add("c", MinorType.VARCHAR).add("e", MinorType.VARCHAR).add("g", MinorType.VARCHAR).resumeMap().add("d", MinorType.VARCHAR).add("f", MinorType.VARCHAR).resumeSchema().buildSchema();
expected = fixture.rowSetBuilder(expectedSchema).addRow(20, objArray("b2", objArray("c2", "", ""), "", "")).addRow(30, objArray("b3", objArray("c3", "e3", ""), "d3", "")).addRow(40, objArray("b4", objArray("c4", "e4", "g4"), "d4", "e4")).build();
new RowSetComparison(expected).verifyAndClearAll(actual);
rsLoader.close();
}
use of org.apache.drill.exec.vector.accessor.TupleWriter in project drill by apache.
the class TestResultSetLoaderProjection method testStrictMapMemberProjectionWithSchema.
/**
* Projection is based on both the projection list and the
* provided schema, if strict.
*/
@Test
public void testStrictMapMemberProjectionWithSchema() {
// m1.a is projected and in the provided schema
// m1.b is not projected
// m1.z is projected, in the provided schema, but not in the reader schema
// m2.c is projected, in the reader schema, but not in the provided schema
// m3.f is projected, but m3 is not in the provided schema
// m4.g is projected, is in the provided schema, but not in the reader schema
List<SchemaPath> selection = RowSetTestUtils.projectList("m1.a", "m1.z", "m2.c", "m3.f", "m4.g");
ResultSetLoader rsLoader = setupProvidedSchema(true, selection);
RowSetLoader rootWriter = rsLoader.writer();
// Verify the projected columns
TupleMetadata actualSchema = rootWriter.tupleSchema();
TupleWriter m1Writer = rootWriter.tuple("m1");
assertTrue(m1Writer.isProjected());
assertEquals(2, m1Writer.tupleSchema().size());
assertTrue(m1Writer.column("a").isProjected());
assertFalse(m1Writer.column("b").isProjected());
TupleWriter m2Writer = rootWriter.tuple("m2");
assertTrue(m2Writer.isProjected());
assertEquals(2, m2Writer.tupleSchema().size());
assertFalse(m2Writer.column("c").isProjected());
assertFalse(m2Writer.column("d").isProjected());
TupleWriter m3Writer = rootWriter.tuple("m3");
assertFalse(m3Writer.isProjected());
assertEquals(2, m3Writer.tupleSchema().size());
assertFalse(m3Writer.column("e").isProjected());
assertFalse(m3Writer.column("f").isProjected());
assertNull(actualSchema.metadata("m4"));
// Verify. Only the projected columns appear in the result set.
TupleMetadata expectedSchema = new SchemaBuilder().addMap("m1").add("a", MinorType.INT).resumeSchema().addMap("m2").resumeSchema().build();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(mapValue(1), mapValue()).addRow(mapValue(11), mapValue()).build();
RowSetUtilities.verify(expected, fixture.wrap(rsLoader.harvest()));
rsLoader.close();
}
use of org.apache.drill.exec.vector.accessor.TupleWriter in project drill by apache.
the class TestResultSetLoaderProjection method testNonStrictMapMemberProjectionWithSchema.
/**
* Projection is based on both the projection list and the
* provided schema, if strict.
*/
@Test
public void testNonStrictMapMemberProjectionWithSchema() {
// m1 is not projected, though in the provided schema
// m2.c is projected, in the reader schema, but not in the provided schema,
// but schema is non-strict, so is projected
// m2.d is projected and in both schemas
// m3.f is projected, but m3 is not in the provided schema, but since schema is
// non-strict, it is projected
// m4.g is projected, is in the provided schema, but not in the reader schema
List<SchemaPath> selection = RowSetTestUtils.projectList("m2.c", "m2.d", "m3.f", "m4.g");
ResultSetLoader rsLoader = setupProvidedSchema(false, selection);
RowSetLoader rootWriter = rsLoader.writer();
// Verify the projected columns
TupleMetadata actualSchema = rootWriter.tupleSchema();
TupleWriter m1Writer = rootWriter.tuple("m1");
assertFalse(m1Writer.isProjected());
assertEquals(2, m1Writer.tupleSchema().size());
assertFalse(m1Writer.column("a").isProjected());
assertFalse(m1Writer.column("b").isProjected());
TupleWriter m2Writer = rootWriter.tuple("m2");
assertTrue(m2Writer.isProjected());
assertEquals(2, m2Writer.tupleSchema().size());
assertTrue(m2Writer.column("c").isProjected());
assertTrue(m2Writer.column("d").isProjected());
TupleWriter m3Writer = rootWriter.tuple("m3");
assertTrue(m3Writer.isProjected());
assertEquals(2, m3Writer.tupleSchema().size());
assertFalse(m3Writer.column("e").isProjected());
assertTrue(m3Writer.column("f").isProjected());
assertNull(actualSchema.metadata("m4"));
// Verify. Only the projected columns appear in the result set.
TupleMetadata expectedSchema = new SchemaBuilder().addMap("m2").add("c", MinorType.INT).add("d", MinorType.INT).resumeSchema().addMap("m3").add("f", MinorType.INT).resumeSchema().build();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(mapValue(3, 4), mapValue(6)).addRow(mapValue(13, 14), mapValue(16)).build();
RowSetUtilities.verify(expected, fixture.wrap(rsLoader.harvest()));
rsLoader.close();
}
use of org.apache.drill.exec.vector.accessor.TupleWriter in project drill by apache.
the class TestResultSetLoaderProjection method testMapProjectionMemberAndMap.
@Test
public void testMapProjectionMemberAndMap() {
// SELECT m1, m1.b
// This really means project all of m1; m1.b is along for the ride.
List<SchemaPath> selection = RowSetTestUtils.projectList("m1", "m1.b");
// Define an "early" reader schema consistent with the projection.
TupleMetadata schema = new SchemaBuilder().addMap("m1").add("a", MinorType.INT).add("b", MinorType.INT).resumeSchema().buildSchema();
ResultSetOptions options = new ResultSetOptionBuilder().projection(Projections.parse(selection)).readerSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
// Verify the projected columns
TupleMetadata actualSchema = rootWriter.tupleSchema();
ColumnMetadata m1Md = actualSchema.metadata("m1");
TupleWriter m1Writer = rootWriter.tuple("m1");
assertTrue(m1Md.isMap());
assertTrue(m1Writer.isProjected());
assertEquals(2, m1Md.tupleSchema().size());
assertTrue(m1Writer.column("a").isProjected());
assertTrue(m1Writer.column("b").isProjected());
// Write a couple of rows.
rsLoader.startBatch();
rootWriter.start();
rootWriter.addSingleCol(mapValue(1, 2)).addSingleCol(mapValue(11, 12));
// Verify. The whole map appears in the result set because the
// project list included the whole map as well as a map member.
SingleRowSet expected = fixture.rowSetBuilder(schema).addSingleCol(mapValue(1, 2)).addSingleCol(mapValue(11, 12)).build();
RowSetUtilities.verify(expected, fixture.wrap(rsLoader.harvest()));
rsLoader.close();
}
use of org.apache.drill.exec.vector.accessor.TupleWriter in project drill by apache.
the class TestResultSetLoaderMapArray method testCloseWithoutHarvest.
/**
* Test that memory is released if the loader is closed with an active
* batch (that is, before the batch is harvested.)
*/
@Test
public void testCloseWithoutHarvest() {
TupleMetadata schema = new SchemaBuilder().addMapArray("m").add("a", MinorType.INT).add("b", MinorType.VARCHAR).resumeSchema().buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().readerSchema(schema).rowCountLimit(ValueVector.MAX_ROW_COUNT).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
ArrayWriter maWriter = rootWriter.array("m");
TupleWriter mWriter = maWriter.tuple();
rsLoader.startBatch();
for (int i = 0; i < 40; i++) {
rootWriter.start();
for (int j = 0; j < 3; j++) {
mWriter.scalar("a").setInt(i);
mWriter.scalar("b").setString("b-" + i);
maWriter.save();
}
rootWriter.save();
}
// Don't harvest the batch. Allocator will complain if the
// loader does not release memory.
rsLoader.close();
}
Aggregations