use of org.apache.drill.test.rowSet.RowSetReader in project drill by axbaretto.
the class TestFillEmpties method doFillEmptiesScalar.
private void doFillEmptiesScalar(MajorType majorType) {
TupleMetadata schema = new SchemaBuilder().add("a", majorType).buildSchema();
ExtendableRowSet rs = fixture.rowSet(schema);
RowSetWriter writer = rs.writer();
ScalarWriter colWriter = writer.scalar(0);
ValueType valueType = colWriter.valueType();
boolean nullable = majorType.getMode() == DataMode.OPTIONAL;
for (int i = 0; i < ROW_COUNT; i++) {
if (i % 5 == 0) {
colWriter.setObject(RowSetUtilities.testDataFromInt(valueType, majorType, i));
}
writer.save();
}
SingleRowSet result = writer.done();
RowSetReader reader = result.reader();
ScalarReader colReader = reader.scalar(0);
MinorType type = majorType.getMinorType();
boolean isVariable = (type == MinorType.VARCHAR || type == MinorType.VAR16CHAR || type == MinorType.VARBINARY);
for (int i = 0; i < ROW_COUNT; i++) {
assertTrue(reader.next());
if (i % 5 != 0) {
if (nullable) {
// Nullable types fill with nulls.
assertTrue(colReader.isNull());
continue;
}
if (isVariable) {
// Variable width types fill with a zero-length value.
assertEquals(0, colReader.getBytes().length);
continue;
}
}
// All other types fill with zero-bytes, interpreted as some form
// of zero for each type.
Object actual = colReader.getObject();
Object expected = RowSetUtilities.testDataFromInt(valueType, majorType, i % 5 == 0 ? i : 0);
RowSetUtilities.assertEqualValues(majorType.toString().replace('\n', ' ') + "[" + i + "]", valueType, expected, actual);
}
result.clear();
}
use of org.apache.drill.test.rowSet.RowSetReader in project drill by axbaretto.
the class JsonFileBuilder method build.
public void build(File tableFile) throws IOException {
tableFile.getParentFile().mkdirs();
try (BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(tableFile))) {
final RowSetReader reader = rowSet.reader();
final int numCols = rowSet.batchSchema().getFieldCount();
final Iterator<MaterializedField> fieldIterator = rowSet.batchSchema().iterator();
final List<String> columnNames = Lists.newArrayList();
final List<String> columnFormatters = Lists.newArrayList();
// Build formatters from first row.
while (fieldIterator.hasNext()) {
final String columnName = fieldIterator.next().getName();
final ScalarReader columnReader = reader.scalar(columnName);
final ValueType valueType = columnReader.valueType();
final String columnFormatter;
if (customFormatters.containsKey(columnName)) {
columnFormatter = customFormatters.get(columnName);
} else if (DEFAULT_FORMATTERS.containsKey(valueType)) {
columnFormatter = DEFAULT_FORMATTERS.get(valueType);
} else {
final String message = String.format("Unsupported column type %s", valueType);
throw new UnsupportedOperationException(message);
}
columnNames.add(columnName);
columnFormatters.add(columnFormatter);
}
final StringBuilder sb = new StringBuilder();
String lineSeparator = "";
for (int index = 0; index < rowSet.rowCount(); index++) {
reader.next();
sb.append(lineSeparator);
sb.append('{');
String separator = "";
for (int columnIndex = 0; columnIndex < numCols; columnIndex++) {
sb.append(separator);
final String columnName = columnNames.get(columnIndex);
final ScalarReader columnReader = reader.scalar(columnIndex);
final String columnFormatter = columnFormatters.get(columnIndex);
final Object columnObject = columnReader.getObject();
final String columnString = String.format(columnFormatter, columnObject);
sb.append('"').append(columnName).append('"').append(':').append(columnString);
separator = ",";
}
sb.append('}');
lineSeparator = "\n";
os.write(sb.toString().getBytes());
sb.delete(0, sb.length());
}
}
}
use of org.apache.drill.test.rowSet.RowSetReader in project drill by axbaretto.
the class TestResultSetLoaderMaps method testMapOverflowWithNewColumn.
/**
* Test the case in which a new column is added during the overflow row. Unlike
* the top-level schema case, internally we must create a copy of the map, and
* move vectors across only when the result is to include the schema version
* of the target column. For overflow, the new column is added after the
* first batch; it is added in the second batch that contains the overflow
* row in which the column was added.
*/
@Test
public void testMapOverflowWithNewColumn() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addMap("m").add("b", MinorType.INT).add("c", MinorType.VARCHAR).resumeSchema().buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setSchema(schema).setRowCountLimit(ValueVector.MAX_ROW_COUNT).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
assertEquals(4, rsLoader.schemaVersion());
RowSetLoader rootWriter = rsLoader.writer();
// Can't use the shortcut to populate rows when doing a schema
// change.
ScalarWriter aWriter = rootWriter.scalar("a");
TupleWriter mWriter = rootWriter.tuple("m");
ScalarWriter bWriter = mWriter.scalar("b");
ScalarWriter cWriter = mWriter.scalar("c");
byte[] value = new byte[512];
Arrays.fill(value, (byte) 'X');
int count = 0;
rsLoader.startBatch();
while (!rootWriter.isFull()) {
rootWriter.start();
aWriter.setInt(count);
bWriter.setInt(count * 10);
cWriter.setBytes(value, value.length);
if (rootWriter.isFull()) {
// Overflow just occurred. Add another column.
mWriter.addColumn(SchemaBuilder.columnSchema("d", MinorType.INT, DataMode.OPTIONAL));
mWriter.scalar("d").setInt(count * 100);
}
rootWriter.save();
count++;
}
// Result set should include the original columns, but not d.
RowSet result = fixture.wrap(rsLoader.harvest());
assertEquals(4, rsLoader.schemaVersion());
assertTrue(schema.isEquivalent(result.schema()));
BatchSchema expectedSchema = new BatchSchema(SelectionVectorMode.NONE, schema.toFieldList());
assertTrue(expectedSchema.isEquivalent(result.batchSchema()));
// Use a reader to validate row-by-row. Too large to create an expected
// result set.
RowSetReader reader = result.reader();
TupleReader mapReader = reader.tuple("m");
int rowId = 0;
while (reader.next()) {
assertEquals(rowId, reader.scalar("a").getInt());
assertEquals(rowId * 10, mapReader.scalar("b").getInt());
assertTrue(Arrays.equals(value, mapReader.scalar("c").getBytes()));
rowId++;
}
result.clear();
// Next batch should start with the overflow row
rsLoader.startBatch();
assertEquals(1, rootWriter.rowCount());
result = fixture.wrap(rsLoader.harvest());
assertEquals(1, result.rowCount());
reader = result.reader();
mapReader = reader.tuple("m");
while (reader.next()) {
assertEquals(rowId, reader.scalar("a").getInt());
assertEquals(rowId * 10, mapReader.scalar("b").getInt());
assertTrue(Arrays.equals(value, mapReader.scalar("c").getBytes()));
assertEquals(rowId * 100, mapReader.scalar("d").getInt());
}
result.clear();
rsLoader.close();
}
use of org.apache.drill.test.rowSet.RowSetReader in project drill by axbaretto.
the class TestResultSetLoaderOmittedValues method testOmittedValuesAtEndWithOverflow.
/**
* Test "holes" at the end of a batch when batch overflows. Completed
* batch must be finalized correctly, new batch initialized correct,
* for the missing values.
*/
@Test
public void testOmittedValuesAtEndWithOverflow() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addNullable("c", MinorType.VARCHAR).addNullable("d", MinorType.VARCHAR).buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
// Fill the batch. Column d has some values. Column c is worst case: no values.
rsLoader.startBatch();
byte[] value = new byte[533];
Arrays.fill(value, (byte) 'X');
int rowNumber = 0;
while (!rootWriter.isFull()) {
rootWriter.start();
rowNumber++;
rootWriter.scalar(0).setInt(rowNumber);
rootWriter.scalar(1).setBytes(value, value.length);
if (rowNumber < 10_000) {
rootWriter.scalar(3).setString("d-" + rowNumber);
}
rootWriter.save();
assertEquals(rowNumber, rsLoader.totalRowCount());
}
// Harvest and verify
RowSet result = fixture.wrap(rsLoader.harvest());
assertEquals(rowNumber - 1, result.rowCount());
RowSetReader reader = result.reader();
int rowIndex = 0;
while (reader.next()) {
int expectedRowNumber = 1 + rowIndex;
assertEquals(expectedRowNumber, reader.scalar(0).getInt());
assertTrue(reader.scalar(2).isNull());
if (expectedRowNumber < 10_000) {
assertEquals("d-" + expectedRowNumber, reader.scalar(3).getString());
} else {
assertTrue(reader.scalar(3).isNull());
}
rowIndex++;
}
// Start count for this batch is one less than current
// count, because of the overflow row.
int startRowNumber = rowNumber;
// Write a few more rows to the next batch
rsLoader.startBatch();
for (int i = 0; i < 10; i++) {
rootWriter.start();
rowNumber++;
rootWriter.scalar(0).setInt(rowNumber);
rootWriter.scalar(1).setBytes(value, value.length);
if (i > 5) {
rootWriter.scalar(3).setString("d-" + rowNumber);
}
rootWriter.save();
assertEquals(rowNumber, rsLoader.totalRowCount());
}
// Verify that holes were preserved.
result = fixture.wrap(rsLoader.harvest());
assertEquals(rowNumber, rsLoader.totalRowCount());
assertEquals(rowNumber - startRowNumber + 1, result.rowCount());
// result.print();
reader = result.reader();
rowIndex = 0;
while (reader.next()) {
int expectedRowNumber = startRowNumber + rowIndex;
assertEquals(expectedRowNumber, reader.scalar(0).getInt());
assertTrue(reader.scalar(2).isNull());
if (rowIndex > 6) {
assertEquals("d-" + expectedRowNumber, reader.scalar(3).getString());
} else {
assertTrue("Row " + rowIndex + " col d should be null", reader.scalar(3).isNull());
}
rowIndex++;
}
assertEquals(rowIndex, 11);
rsLoader.close();
}
use of org.apache.drill.test.rowSet.RowSetReader in project drill by axbaretto.
the class TestResultSetLoaderOverflow method testMissingArrayValues.
/**
* Test the case that an array has "missing values" before the overflow.
*/
@Test
public void testMissingArrayValues() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addArray("c", MinorType.INT).buildSchema();
ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
byte[] value = new byte[512];
Arrays.fill(value, (byte) 'X');
int blankAfter = ValueVector.MAX_BUFFER_SIZE / 512 * 2 / 3;
ScalarWriter cWriter = rootWriter.array("c").scalar();
rsLoader.startBatch();
int rowId = 0;
while (rootWriter.start()) {
rootWriter.scalar("a").setInt(rowId);
rootWriter.scalar("b").setBytes(value, value.length);
if (rowId < blankAfter) {
for (int i = 0; i < 3; i++) {
cWriter.setInt(rowId * 3 + i);
}
}
rootWriter.save();
rowId++;
}
RowSet result = fixture.wrap(rsLoader.harvest());
assertEquals(rowId - 1, result.rowCount());
RowSetReader reader = result.reader();
ScalarElementReader cReader = reader.array("c").elements();
while (reader.next()) {
assertEquals(reader.rowIndex(), reader.scalar("a").getInt());
assertTrue(Arrays.equals(value, reader.scalar("b").getBytes()));
if (reader.rowIndex() < blankAfter) {
assertEquals(3, cReader.size());
for (int i = 0; i < 3; i++) {
assertEquals(reader.rowIndex() * 3 + i, cReader.getInt(i));
}
} else {
assertEquals(0, cReader.size());
}
}
result.clear();
rsLoader.close();
}
Aggregations