use of org.apache.drill.exec.vector.accessor.DictWriter in project drill by apache.
the class TestResultSetLoaderDictArray method testCloseWithoutHarvest.
/**
* Test that memory is released if the loader is closed with an active
* batch (that is, before the batch is harvested.)
*/
@Test
public void testCloseWithoutHarvest() {
TupleMetadata schema = new SchemaBuilder().addDictArray("d", MinorType.INT).value(MinorType.VARCHAR).resumeSchema().buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().readerSchema(schema).rowCountLimit(ValueVector.MAX_ROW_COUNT).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
ArrayWriter arrayWriter = rootWriter.array("d");
DictWriter dictWriter = arrayWriter.dict();
rsLoader.startBatch();
for (int i = 0; i < 40; i++) {
rootWriter.start();
for (int j = 0; j < 3; j++) {
dictWriter.keyWriter().setInt(i);
dictWriter.valueWriter().scalar().setString("b-" + i);
arrayWriter.save();
}
rootWriter.save();
}
// Don't harvest the batch. Allocator will complain if the
// loader does not release memory.
rsLoader.close();
}
use of org.apache.drill.exec.vector.accessor.DictWriter in project drill by apache.
the class TestResultSetLoaderDicts method testKeyOverflow.
@Test
public void testKeyOverflow() {
TupleMetadata schema = new SchemaBuilder().addDict("d", MinorType.VARCHAR).value(MinorType.INT).resumeSchema().buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().rowCountLimit(ValueVector.MAX_ROW_COUNT).readerSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
rsLoader.startBatch();
byte[] key = new byte[523];
Arrays.fill(key, (byte) 'X');
// number of entries in each dict
int dictSize = 4;
// Number of rows should be driven by vector size.
// Our row count should include the overflow row
DictWriter dictWriter = rootWriter.dict(0);
ScalarWriter keyWriter = dictWriter.keyWriter();
ScalarWriter valueWriter = dictWriter.valueWriter().scalar();
int expectedCount = ValueVector.MAX_BUFFER_SIZE / (key.length * dictSize);
{
int count = 0;
while (!rootWriter.isFull()) {
rootWriter.start();
for (int i = 0; i < dictSize; i++) {
keyWriter.setBytes(key, key.length);
// acts as a placeholder, the actual value is not important
valueWriter.setInt(0);
// not necessary for scalars, just for completeness
dictWriter.save();
}
rootWriter.save();
count++;
}
assertEquals(expectedCount + 1, count);
// Loader's row count should include only "visible" rows
assertEquals(expectedCount, rootWriter.rowCount());
// Total count should include invisible and look-ahead rows.
assertEquals(expectedCount + 1, rsLoader.totalRowCount());
// Result should exclude the overflow row
VectorContainer container = rsLoader.harvest();
BatchValidator.validate(container);
RowSet result = fixture.wrap(container);
assertEquals(expectedCount, result.rowCount());
result.clear();
}
// Next batch should start with the overflow row
{
rsLoader.startBatch();
assertEquals(1, rootWriter.rowCount());
assertEquals(expectedCount + 1, rsLoader.totalRowCount());
VectorContainer container = rsLoader.harvest();
BatchValidator.validate(container);
RowSet result = fixture.wrap(container);
assertEquals(1, result.rowCount());
result.clear();
}
rsLoader.close();
}
use of org.apache.drill.exec.vector.accessor.DictWriter in project drill by apache.
the class TestResultSetLoaderProjection method testDictProjection.
@Test
public void testDictProjection() {
final String dictName1 = "d1";
final String dictName2 = "d2";
// There is no test for case when obtaining a value by key as this is not as simple projection
// as it is in case of map - there is a need to find a value corresponding to a key
// (the functionality is currently present in DictReader) and final column schema should be
// changed from dict structure with `key` and `value` children to a simple `value`.
List<SchemaPath> selection = RowSetTestUtils.projectList(dictName1);
TupleMetadata schema = new SchemaBuilder().addDict(dictName1, MinorType.VARCHAR).value(MinorType.INT).resumeSchema().addDict(dictName2, MinorType.VARCHAR).value(MinorType.INT).resumeSchema().buildSchema();
ResultSetOptions options = new ResultSetOptionBuilder().projection(Projections.parse(selection)).readerSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
// Verify the projected columns
TupleMetadata actualSchema = rootWriter.tupleSchema();
ColumnMetadata dictMetadata1 = actualSchema.metadata(dictName1);
DictWriter dictWriter1 = rootWriter.dict(dictName1);
assertTrue(dictMetadata1.isDict());
assertTrue(dictWriter1.isProjected());
assertEquals(2, dictMetadata1.tupleSchema().size());
assertTrue(dictWriter1.keyWriter().isProjected());
assertTrue(dictWriter1.valueWriter().isProjected());
ColumnMetadata dictMetadata2 = actualSchema.metadata(dictName2);
DictWriter dictWriter2 = rootWriter.dict(dictName2);
assertTrue(dictMetadata2.isDict());
assertFalse(dictWriter2.isProjected());
assertEquals(2, dictMetadata2.tupleSchema().size());
assertFalse(dictWriter2.keyWriter().isProjected());
assertFalse(dictWriter2.valueWriter().isProjected());
// Write a couple of rows.
rsLoader.startBatch();
rootWriter.start();
rootWriter.addRow(map("a", 1, "b", 2), map("c", 3, "d", 4)).addRow(map("a", 11, "b", 12), map("c", 13, "d", 14));
// Verify. Only the projected columns appear in the result set.
TupleMetadata expectedSchema = new SchemaBuilder().addDict(dictName1, MinorType.VARCHAR).value(MinorType.INT).resumeSchema().buildSchema();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(map("a", 1, "b", 2)).addRow(map("a", 11, "b", 12)).build();
RowSetUtilities.verify(expected, fixture.wrap(rsLoader.harvest()));
rsLoader.close();
}
use of org.apache.drill.exec.vector.accessor.DictWriter in project drill by apache.
the class ObjectDictWriter method buildDict.
public static ObjectDictWriter.DictObjectWriter buildDict(ColumnMetadata metadata, DictVector vector, List<AbstractObjectWriter> keyValueWriters) {
DictEntryWriter.DictEntryObjectWriter entryObjectWriter = DictEntryWriter.buildDictEntryWriter(metadata, keyValueWriters, vector);
DictWriter objectDictWriter;
if (vector != null) {
objectDictWriter = new ObjectDictWriter(metadata, vector.getOffsetVector(), entryObjectWriter);
} else {
objectDictWriter = new DummyDictWriter(metadata, entryObjectWriter);
}
return new ObjectDictWriter.DictObjectWriter(objectDictWriter);
}
use of org.apache.drill.exec.vector.accessor.DictWriter in project drill by apache.
the class TestDummyWriter method testDummyDict.
@Test
public void testDummyDict() {
final String dictName = "d";
final String dictArrayName = "da";
TupleMetadata schema = new SchemaBuilder().addDict(dictName, MinorType.INT).repeatedValue(MinorType.VARCHAR).resumeSchema().addDictArray(dictArrayName, MinorType.VARCHAR).value(MinorType.INT).resumeSchema().buildSchema();
List<AbstractObjectWriter> writers = new ArrayList<>();
final String keyFieldName = DictVector.FIELD_KEY_NAME;
final String valueFieldName = DictVector.FIELD_VALUE_NAME;
// Create key and value writers for dict
ColumnMetadata dictMetadata = schema.metadata(dictName);
TupleMetadata dictSchema = dictMetadata.tupleSchema();
List<AbstractObjectWriter> dictFields = new ArrayList<>();
dictFields.add(ColumnWriterFactory.buildColumnWriter(dictSchema.metadata(keyFieldName), null));
dictFields.add(ColumnWriterFactory.buildColumnWriter(dictSchema.metadata(valueFieldName), null));
writers.add(ObjectDictWriter.buildDict(dictMetadata, null, dictFields));
// Create key and value writers for dict array
ColumnMetadata dictArrayMetadata = schema.metadata(dictArrayName);
TupleMetadata dictArraySchema = dictArrayMetadata.tupleSchema();
List<AbstractObjectWriter> dictArrayFields = new ArrayList<>();
dictArrayFields.add(ColumnWriterFactory.buildColumnWriter(dictArraySchema.metadata(keyFieldName), null));
dictArrayFields.add(ColumnWriterFactory.buildColumnWriter(dictArraySchema.metadata(valueFieldName), null));
writers.add(ObjectDictWriter.buildDictArray(dictArrayMetadata, null, dictArrayFields));
AbstractTupleWriter rootWriter = new RootWriterFixture(schema, writers);
// Events are ignored.
rootWriter.startWrite();
rootWriter.startRow();
// Nothing is projected
DictWriter dictWriter = rootWriter.dict(dictName);
assertFalse(dictWriter.isProjected());
assertFalse(dictWriter.keyWriter().isProjected());
assertFalse(dictWriter.valueWriter().array().scalar().isProjected());
DictWriter dictWriter1 = rootWriter.array(dictArrayName).dict();
assertFalse(dictWriter1.isProjected());
assertFalse(dictWriter1.keyWriter().isProjected());
assertFalse(dictWriter1.valueWriter().scalar().isProjected());
// Dummy columns seem real.
rootWriter.dict(dictName).keyWriter().setInt(20);
rootWriter.dict(0).valueWriter().array().scalar().setString("foo");
// Dummy array dict seems real.
rootWriter.array(dictArrayName).dict().keyWriter().setString("foo");
rootWriter.array(dictArrayName).dict().valueWriter().scalar().setInt(30);
rootWriter.array(dictArrayName).save();
rootWriter.array(1).dict().keyWriter().setString("bar");
rootWriter.array(1).dict().valueWriter().scalar().setInt(40);
rootWriter.array(1).save();
// More ignored events.
rootWriter.restartRow();
rootWriter.saveRow();
rootWriter.endWrite();
}
Aggregations