use of org.apache.drill.exec.vector.accessor.TupleReader in project drill by apache.
the class MetadataControllerBatch method getFileMetadata.
private FileMetadata getFileMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
List<String> segmentColumns = popConfig.getContext().segmentColumns();
String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
List<String> partitionValues = segmentColumns.stream().limit(nestingLevel - 1).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
Path path = new Path(reader.column(MetastoreAnalyzeConstants.LOCATION_FIELD).scalar().getString());
String metadataIdentifier = MetadataIdentifierUtils.getFileMetadataIdentifier(partitionValues, path);
MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.FILE).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
return FileMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).path(path).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
use of org.apache.drill.exec.vector.accessor.TupleReader in project drill by apache.
the class TestVariantAccessors method testUnionWithMap.
/**
* Test a variant (AKA "union vector") at the top level which
* includes a map.
*/
@Test
public void testUnionWithMap() {
final TupleMetadata schema = new SchemaBuilder().addUnion("u").addType(MinorType.VARCHAR).addMap().addNullable("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).resumeUnion().resumeSchema().buildSchema();
SingleRowSet result;
// Write values
{
final ExtendableRowSet rs = fixture.rowSet(schema);
final RowSetWriter writer = rs.writer();
// Sanity check of writer structure
final ObjectWriter wo = writer.column(0);
assertEquals(ObjectType.VARIANT, wo.type());
final VariantWriter vw = wo.variant();
assertTrue(vw.hasType(MinorType.VARCHAR));
final ObjectWriter strObj = vw.member(MinorType.VARCHAR);
final ScalarWriter strWriter = strObj.scalar();
assertSame(strWriter, vw.scalar(MinorType.VARCHAR));
assertTrue(vw.hasType(MinorType.MAP));
final ObjectWriter mapObj = vw.member(MinorType.MAP);
final TupleWriter mWriter = mapObj.tuple();
assertSame(mWriter, vw.tuple());
final ScalarWriter aWriter = mWriter.scalar("a");
final ScalarWriter bWriter = mWriter.scalar("b");
// First row: string "first"
vw.setType(MinorType.VARCHAR);
strWriter.setString("first");
writer.save();
// Second row: a map
vw.setType(MinorType.MAP);
aWriter.setInt(20);
bWriter.setString("fred");
writer.save();
// Third row: null
vw.setNull();
writer.save();
// Fourth row: map with a null string
vw.setType(MinorType.MAP);
aWriter.setInt(40);
bWriter.setNull();
writer.save();
// Fifth row: string "last"
vw.setType(MinorType.VARCHAR);
strWriter.setString("last");
writer.save();
result = writer.done();
assertEquals(5, result.rowCount());
}
// Read the values.
{
final RowSetReader reader = result.reader();
// Sanity check of structure
final ObjectReader ro = reader.column(0);
assertEquals(ObjectType.VARIANT, ro.type());
final VariantReader vr = ro.variant();
assertTrue(vr.hasType(MinorType.VARCHAR));
final ObjectReader strObj = vr.member(MinorType.VARCHAR);
final ScalarReader strReader = strObj.scalar();
assertSame(strReader, vr.scalar(MinorType.VARCHAR));
assertTrue(vr.hasType(MinorType.MAP));
final ObjectReader mapObj = vr.member(MinorType.MAP);
final TupleReader mReader = mapObj.tuple();
assertSame(mReader, vr.tuple());
final ScalarReader aReader = mReader.scalar("a");
final ScalarReader bReader = mReader.scalar("b");
// First row: string "first"
assertTrue(reader.next());
assertFalse(vr.isNull());
assertEquals(MinorType.VARCHAR, vr.dataType());
assertFalse(strReader.isNull());
assertTrue(mReader.isNull());
assertEquals("first", strReader.getString());
// Second row: a map
assertTrue(reader.next());
assertFalse(vr.isNull());
assertEquals(MinorType.MAP, vr.dataType());
assertTrue(strReader.isNull());
assertFalse(mReader.isNull());
assertFalse(aReader.isNull());
assertEquals(20, aReader.getInt());
assertFalse(bReader.isNull());
assertEquals("fred", bReader.getString());
// Third row: null
assertTrue(reader.next());
assertTrue(vr.isNull());
assertTrue(strReader.isNull());
assertTrue(mReader.isNull());
assertTrue(aReader.isNull());
assertTrue(bReader.isNull());
// Fourth row: map with a null string
assertTrue(reader.next());
assertEquals(MinorType.MAP, vr.dataType());
assertEquals(40, aReader.getInt());
assertTrue(bReader.isNull());
// Fifth row: string "last"
assertTrue(reader.next());
assertEquals(MinorType.VARCHAR, vr.dataType());
assertEquals("last", strReader.getString());
assertFalse(reader.next());
}
result.clear();
}
use of org.apache.drill.exec.vector.accessor.TupleReader in project drill by apache.
the class TestRowSet method example.
/**
* The code below is not a test. Rather, it is a simple example of
* how to write a batch of data using writers, then read it using
* readers.
*/
@Test
public void example() {
// Step 1: Define a schema. In a real app, this
// will be provided by a reader, by an incoming batch,
// etc.
final TupleMetadata schema = new SchemaBuilder().add("a", MinorType.VARCHAR).addArray("b", MinorType.INT).addMap("c").add("c1", MinorType.INT).add("c2", MinorType.VARCHAR).resumeSchema().buildSchema();
// Step 2: Create a batch. Done here because this is
// a batch-oriented test. Done automatically in the
// result set loader.
final DirectRowSet drs = DirectRowSet.fromSchema(fixture.allocator(), schema);
// Step 3: Create the writer.
final RowSetWriter writer = drs.writer();
// Step 4: Populate data. Here we do it the way an app would:
// using the individual accessors. See tests above for the many
// ways this can be done depending on the need of the app.
//
// Write two rows:
// ("fred", [10, 11], {12, "wilma"})
// ("barney", [20, 21], {22, "betty"})
//
// This example uses Java strings for Varchar. Real code might
// use byte arrays.
writer.scalar("a").setString("fred");
final ArrayWriter bWriter = writer.array("b");
bWriter.scalar().setInt(10);
bWriter.scalar().setInt(11);
final TupleWriter cWriter = writer.tuple("c");
cWriter.scalar("c1").setInt(12);
cWriter.scalar("c2").setString("wilma");
writer.save();
writer.scalar("a").setString("barney");
bWriter.scalar().setInt(20);
bWriter.scalar().setInt(21);
cWriter.scalar("c1").setInt(22);
cWriter.scalar("c2").setString("betty");
writer.save();
// Step 5: "Harvest" the batch. Done differently in the
// result set loader.
final SingleRowSet rowSet = writer.done();
// Step 5: Create a reader.
final RowSetReader reader = rowSet.reader();
while (reader.next()) {
final StringBuilder sb = new StringBuilder();
sb.append(print(reader.scalar("a").getString()));
final ArrayReader bReader = reader.array("b");
while (bReader.next()) {
sb.append(print(bReader.scalar().getInt()));
}
final TupleReader cReader = reader.tuple("c");
sb.append(print(cReader.scalar("c1").getInt()));
sb.append(print(cReader.scalar("c2").getString()));
logger.debug(sb.toString());
}
// Step 7: Free memory.
rowSet.clear();
}
use of org.apache.drill.exec.vector.accessor.TupleReader in project drill by apache.
the class TestMapAccessors method testNestedMapsRequired.
/**
* Create nested maps. Use required, variable-width columns since
* those require the most processing and are most likely to
* fail if anything is out of place.
*/
@Test
public void testNestedMapsRequired() {
final TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addMap("m1").add("b", MinorType.VARCHAR).addMap("m2").add("c", MinorType.VARCHAR).resumeMap().add("d", MinorType.VARCHAR).resumeSchema().buildSchema();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
RowSetWriter rootWriter = builder.writer();
rootWriter.addRow(10, mapValue("b1", mapValue("c1"), "d1"));
rootWriter.addRow(20, mapValue("b2", mapValue("c2"), "d2"));
// Validate directly
RowSet result = builder.build();
RowSetReader rootReader = result.reader();
TupleReader m1Reader = rootReader.tuple("m1");
TupleReader m2Reader = m1Reader.tuple("m2");
rootReader.next();
assertEquals(10, rootReader.scalar("a").getInt());
assertEquals("b1", m1Reader.scalar("b").getString());
assertEquals("c1", m2Reader.scalar("c").getString());
assertEquals("d1", m1Reader.scalar("d").getString());
rootReader.next();
assertEquals(20, rootReader.scalar("a").getInt());
assertEquals("b2", m1Reader.scalar("b").getString());
assertEquals("c2", m2Reader.scalar("c").getString());
assertEquals("d2", m1Reader.scalar("d").getString());
// Validate with convenience methods
RowSet expected = fixture.rowSetBuilder(schema).addRow(10, mapValue("b1", mapValue("c1"), "d1")).addRow(20, mapValue("b2", mapValue("c2"), "d2")).build();
new RowSetComparison(expected).verify(result);
// Test that the row set rebuilds its internal structure from
// a vector container.
RowSet wrapped = fixture.wrap(result.container());
RowSetUtilities.verify(expected, wrapped);
}
use of org.apache.drill.exec.vector.accessor.TupleReader in project drill by apache.
the class TestRowSet method testDictStructureMapValue.
@Test
public void testDictStructureMapValue() {
final String dictName = "d";
final int bScale = 1;
final TupleMetadata schema = new SchemaBuilder().add("id", MinorType.INT).addDict(dictName, MinorType.INT).mapValue().add("a", MinorType.INT).add("b", MinorType.VARDECIMAL, 8, bScale).resumeDict().resumeSchema().buildSchema();
final ExtendableRowSet rowSet = fixture.rowSet(schema);
final RowSetWriter writer = rowSet.writer();
// Dict with Map value
assertEquals(ObjectType.ARRAY, writer.column(dictName).type());
final ScalarWriter idWriter = writer.scalar(0);
final DictWriter dictWriter = writer.column(1).dict();
assertEquals(ValueType.INTEGER, dictWriter.keyType());
assertEquals(ObjectType.TUPLE, dictWriter.valueType());
final ScalarWriter keyWriter = dictWriter.keyWriter();
final TupleWriter valueWriter = dictWriter.valueWriter().tuple();
assertEquals(ValueType.INTEGER, keyWriter.valueType());
ScalarWriter aWriter = valueWriter.scalar("a");
ScalarWriter bWriter = valueWriter.scalar("b");
assertEquals(ValueType.INTEGER, aWriter.valueType());
assertEquals(ValueType.DECIMAL, bWriter.valueType());
// Write data
idWriter.setInt(1);
keyWriter.setInt(11);
aWriter.setInt(10);
bWriter.setDecimal(BigDecimal.valueOf(1));
// advance to next entry position
dictWriter.save();
keyWriter.setInt(12);
aWriter.setInt(11);
bWriter.setDecimal(BigDecimal.valueOf(2));
dictWriter.save();
writer.save();
idWriter.setInt(2);
keyWriter.setInt(21);
aWriter.setInt(20);
bWriter.setDecimal(BigDecimal.valueOf(3));
dictWriter.save();
writer.save();
idWriter.setInt(3);
keyWriter.setInt(31);
aWriter.setInt(30);
bWriter.setDecimal(BigDecimal.valueOf(4));
dictWriter.save();
keyWriter.setInt(32);
aWriter.setInt(31);
bWriter.setDecimal(BigDecimal.valueOf(5));
dictWriter.save();
keyWriter.setInt(33);
aWriter.setInt(32);
bWriter.setDecimal(BigDecimal.valueOf(6));
dictWriter.save();
writer.save();
// Finish the row set and get a reader.
final SingleRowSet actual = writer.done();
final RowSetReader reader = actual.reader();
// Verify reader structure
assertEquals(ObjectType.ARRAY, reader.column(dictName).type());
final DictReader dictReader = reader.dict(1);
assertEquals(ObjectType.ARRAY, dictReader.type());
assertEquals(ValueType.INTEGER, dictReader.keyColumnType());
assertEquals(ObjectType.TUPLE, dictReader.valueColumnType());
final KeyAccessor keyAccessor = dictReader.keyAccessor();
final TupleReader valueReader = dictReader.valueReader().tuple();
// Row 1: get value reader with its position set to entry corresponding to a key
assertTrue(reader.next());
// dict itself is not null
assertFalse(dictReader.isNull());
assertTrue(keyAccessor.find(12));
assertEquals(11, valueReader.scalar("a").getInt());
assertEquals(BigDecimal.valueOf(2.0), valueReader.scalar("b").getDecimal());
// MapReader#getObject() returns a List containing values for each column
// rather than mapping of column name to it's value, hence List is expected for Dict's value.
Map<Object, Object> map = map(11, Arrays.asList(10, BigDecimal.valueOf(1.0)), 12, Arrays.asList(11, BigDecimal.valueOf(2.0)));
assertEquals(map, dictReader.getObject());
// Row 2
assertTrue(reader.next());
assertFalse(keyAccessor.find(222));
assertTrue(keyAccessor.find(21));
assertEquals(Arrays.asList(20, BigDecimal.valueOf(3.0)), valueReader.getObject());
map = map(21, Arrays.asList(20, BigDecimal.valueOf(3.0)));
assertEquals(map, dictReader.getObject());
// Row 3
assertTrue(reader.next());
assertTrue(keyAccessor.find(32));
assertFalse(valueReader.isNull());
assertEquals(31, valueReader.scalar("a").getInt());
assertEquals(BigDecimal.valueOf(5.0), valueReader.scalar("b").getDecimal());
assertTrue(keyAccessor.find(31));
assertEquals(30, valueReader.scalar("a").getInt());
assertEquals(BigDecimal.valueOf(4.0), valueReader.scalar("b").getDecimal());
assertFalse(keyAccessor.find(404));
map = map(31, Arrays.asList(30, BigDecimal.valueOf(4.0)), 32, Arrays.asList(31, BigDecimal.valueOf(5.0)), 33, Arrays.asList(32, BigDecimal.valueOf(6.0)));
assertEquals(map, dictReader.getObject());
assertFalse(reader.next());
// Verify that the dict accessor's value count was set.
final DictVector dictVector = (DictVector) actual.container().getValueVector(1).getValueVector();
assertEquals(3, dictVector.getAccessor().getValueCount());
final SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(1, map(11, objArray(10, BigDecimal.valueOf(1.0)), 12, objArray(11, BigDecimal.valueOf(2.0)))).addRow(2, map(21, objArray(20, BigDecimal.valueOf(3.0)))).addRow(3, map(31, objArray(30, BigDecimal.valueOf(4.0)), 32, objArray(31, BigDecimal.valueOf(5.0)), 33, objArray(32, BigDecimal.valueOf(6.0)))).build();
RowSetUtilities.verify(expected, actual);
}
Aggregations