Search in sources :

Example 91 with RowSetLoader

use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.

the class TestResultSetLoaderUnions method testSimpleListDynamic.

/**
 * Test a simple list created dynamically at load time.
 * The list must include a single type member.
 */
@Test
public void testSimpleListDynamic() {
    final ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator());
    final RowSetLoader writer = rsLoader.writer();
    // Can write a batch as if this was a repeated Varchar, except
    // that any value can also be null.
    rsLoader.startBatch();
    writer.addColumn(MaterializedField.create("id", Types.required(MinorType.INT)));
    final ColumnMetadata colSchema = MetadataUtils.newVariant("list", DataMode.REPEATED);
    colSchema.variantSchema().addType(MinorType.VARCHAR);
    colSchema.variantSchema().becomeSimple();
    writer.addColumn(colSchema);
    // Sanity check: should be an array of Varchar because we said the
    // types within the list is not expandable.
    final ArrayWriter arrWriter = writer.array("list");
    assertEquals(ObjectType.SCALAR, arrWriter.entryType());
    final ScalarWriter strWriter = arrWriter.scalar();
    assertEquals(ValueType.STRING, strWriter.valueType());
    writer.addRow(1, strArray("fred", "barney")).addRow(2, null).addRow(3, strArray("wilma", "betty", "pebbles"));
    // Verify
    final TupleMetadata schema = new SchemaBuilder().add("id", MinorType.INT).addList("list").addType(MinorType.VARCHAR).resumeSchema().buildSchema();
    final SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(1, strArray("fred", "barney")).addRow(2, null).addRow(3, strArray("wilma", "betty", "pebbles")).build();
    RowSetUtilities.verify(expected, fixture.wrap(rsLoader.harvest()));
}
Also used : ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) ArrayWriter(org.apache.drill.exec.vector.accessor.ArrayWriter) ScalarWriter(org.apache.drill.exec.vector.accessor.ScalarWriter) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 92 with RowSetLoader

use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.

the class TestResultSetSchemaChange method testSchemaChangeFirstBatch.

/**
 * Test the case where the schema changes in the first batch.
 * Schema changes before the first record are trivial and tested
 * elsewhere. Here we write some records, then add new columns, as a
 * JSON reader might do.
 */
@Test
public void testSchemaChangeFirstBatch() {
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator());
    RowSetLoader rootWriter = rsLoader.writer();
    rootWriter.addColumn(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.REQUIRED));
    // Create initial rows
    rsLoader.startBatch();
    int rowCount = 0;
    for (int i = 0; i < 2; i++) {
        rootWriter.start();
        rowCount++;
        rootWriter.scalar(0).setString("a_" + rowCount);
        rootWriter.save();
    }
    // Add a second column: nullable.
    rootWriter.addColumn(SchemaBuilder.columnSchema("b", MinorType.INT, DataMode.OPTIONAL));
    for (int i = 0; i < 2; i++) {
        rootWriter.start();
        rowCount++;
        rootWriter.scalar(0).setString("a_" + rowCount);
        rootWriter.scalar(1).setInt(rowCount);
        rootWriter.save();
    }
    // Add a third column. Use variable-width so that offset
    // vectors must be back-filled.
    rootWriter.addColumn(SchemaBuilder.columnSchema("c", MinorType.VARCHAR, DataMode.OPTIONAL));
    for (int i = 0; i < 2; i++) {
        rootWriter.start();
        rowCount++;
        rootWriter.scalar(0).setString("a_" + rowCount);
        rootWriter.scalar(1).setInt(rowCount);
        rootWriter.scalar(2).setString("c_" + rowCount);
        rootWriter.save();
    }
    // Fourth: Required Varchar. Previous rows are back-filled with empty strings.
    // And a required int. Back-filled with zeros.
    // May occasionally be useful. But, does have to work to prevent
    // vector corruption if some reader decides to go this route.
    rootWriter.addColumn(SchemaBuilder.columnSchema("d", MinorType.VARCHAR, DataMode.REQUIRED));
    rootWriter.addColumn(SchemaBuilder.columnSchema("e", MinorType.INT, DataMode.REQUIRED));
    for (int i = 0; i < 2; i++) {
        rootWriter.start();
        rowCount++;
        rootWriter.scalar(0).setString("a_" + rowCount);
        rootWriter.scalar(1).setInt(rowCount);
        rootWriter.scalar(2).setString("c_" + rowCount);
        rootWriter.scalar(3).setString("d_" + rowCount);
        rootWriter.scalar(4).setInt(rowCount * 10);
        rootWriter.save();
    }
    // Add an array. Now two offset vectors must be back-filled.
    rootWriter.addColumn(SchemaBuilder.columnSchema("f", MinorType.VARCHAR, DataMode.REPEATED));
    for (int i = 0; i < 2; i++) {
        rootWriter.start();
        rowCount++;
        rootWriter.scalar(0).setString("a_" + rowCount);
        rootWriter.scalar(1).setInt(rowCount);
        rootWriter.scalar(2).setString("c_" + rowCount);
        rootWriter.scalar(3).setString("d_" + rowCount);
        rootWriter.scalar(4).setInt(rowCount * 10);
        ScalarWriter arrayWriter = rootWriter.column(5).array().scalar();
        arrayWriter.setString("f_" + rowCount + "-1");
        arrayWriter.setString("f_" + rowCount + "-2");
        rootWriter.save();
    }
    // Harvest the batch and verify.
    RowSet actual = fixture.wrap(rsLoader.harvest());
    TupleMetadata expectedSchema = new SchemaBuilder().add("a", MinorType.VARCHAR).addNullable("b", MinorType.INT).addNullable("c", MinorType.VARCHAR).add("d", MinorType.VARCHAR).add("e", MinorType.INT).addArray("f", MinorType.VARCHAR).buildSchema();
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow("a_1", null, null, "", 0, strArray()).addRow("a_2", null, null, "", 0, strArray()).addRow("a_3", 3, null, "", 0, strArray()).addRow("a_4", 4, null, "", 0, strArray()).addRow("a_5", 5, "c_5", "", 0, strArray()).addRow("a_6", 6, "c_6", "", 0, strArray()).addRow("a_7", 7, "c_7", "d_7", 70, strArray()).addRow("a_8", 8, "c_8", "d_8", 80, strArray()).addRow("a_9", 9, "c_9", "d_9", 90, strArray("f_9-1", "f_9-2")).addRow("a_10", 10, "c_10", "d_10", 100, strArray("f_10-1", "f_10-2")).build();
    RowSetUtilities.verify(expected, actual);
    rsLoader.close();
}
Also used : SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) ScalarWriter(org.apache.drill.exec.vector.accessor.ScalarWriter) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 93 with RowSetLoader

use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.

the class TestResultSetSchemaChange method testSchemaChangeWithOverflow.

/**
 * Test a schema change on the row that overflows. If the
 * new column is added after overflow, it will appear as
 * a schema-change in the following batch. This is fine as
 * we are essentially time-shifting: pretending that the
 * overflow row was written in the next batch (which, in
 * fact, it is: that's what overflow means.)
 */
@Test
public void testSchemaChangeWithOverflow() {
    ResultSetOptions options = new ResultSetOptionBuilder().rowCountLimit(ValueVector.MAX_ROW_COUNT).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    rootWriter.addColumn(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.REQUIRED));
    rsLoader.startBatch();
    byte[] value = new byte[512];
    Arrays.fill(value, (byte) 'X');
    int count = 0;
    while (!rootWriter.isFull()) {
        rootWriter.start();
        rootWriter.scalar(0).setBytes(value, value.length);
        if (rootWriter.isFull()) {
            rootWriter.addColumn(SchemaBuilder.columnSchema("b", MinorType.INT, DataMode.OPTIONAL));
            rootWriter.scalar(1).setInt(count);
            // Add a Varchar to ensure its offset fiddling is done properly
            rootWriter.addColumn(SchemaBuilder.columnSchema("c", MinorType.VARCHAR, DataMode.OPTIONAL));
            rootWriter.scalar(2).setString("c-" + count);
            // Allow adding a required column at this point.
            // (Not intuitively obvious that this should work; we back-fill
            // with zeros.)
            rootWriter.addColumn(SchemaBuilder.columnSchema("d", MinorType.INT, DataMode.REQUIRED));
        }
        rootWriter.save();
        count++;
    }
    // Result should include only the first column.
    SchemaBuilder schemaBuilder = new SchemaBuilder().add("a", MinorType.VARCHAR);
    BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(schemaBuilder).build();
    RowSet result = fixture.wrap(rsLoader.harvest());
    assertTrue(result.batchSchema().isEquivalent(expectedSchema));
    assertEquals(count - 1, result.rowCount());
    result.clear();
    assertEquals(1, rsLoader.schemaVersion());
    // Double check: still can add a required column after
    // starting the next batch. (No longer in overflow state.)
    rsLoader.startBatch();
    rootWriter.addColumn(SchemaBuilder.columnSchema("e", MinorType.INT, DataMode.REQUIRED));
    // Next batch should start with the overflow row, including
    // the column added at the end of the previous batch, after
    // overflow.
    result = fixture.wrap(rsLoader.harvest());
    assertEquals(5, rsLoader.schemaVersion());
    assertEquals(1, result.rowCount());
    BatchSchemaBuilder batchSchemaBuilder = new BatchSchemaBuilder(expectedSchema);
    batchSchemaBuilder.schemaBuilder().addNullable("b", MinorType.INT).addNullable("c", MinorType.VARCHAR).add("d", MinorType.INT).add("e", MinorType.INT);
    expectedSchema = batchSchemaBuilder.build();
    assertTrue(result.batchSchema().isEquivalent(expectedSchema));
    RowSetReader reader = result.reader();
    reader.next();
    assertEquals(count - 1, reader.scalar(1).getInt());
    assertEquals("c-" + (count - 1), reader.scalar(2).getString());
    assertEquals(0, reader.scalar("d").getInt());
    assertEquals(0, reader.scalar("e").getInt());
    result.clear();
    rsLoader.close();
}
Also used : ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) BatchSchema(org.apache.drill.exec.record.BatchSchema) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) ResultSetOptions(org.apache.drill.exec.physical.resultSet.impl.ResultSetLoaderImpl.ResultSetOptions) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 94 with RowSetLoader

use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.

the class TestLoad method testLoadValueVectorEmptyVarCharArray.

@Test
public void testLoadValueVectorEmptyVarCharArray() throws Exception {
    try (BufferAllocator allocator = RootAllocatorFactory.newRoot(drillConfig)) {
        TupleMetadata schema = new SchemaBuilder().addArray("chars", MinorType.VARCHAR).build();
        ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().readerSchema(schema).build();
        ResultSetLoader resultSetLoader = new ResultSetLoaderImpl(allocator, options);
        resultSetLoader.startBatch();
        RowSetLoader rowWriter = resultSetLoader.writer();
        rowWriter.addRow(new Object[] { null });
        VectorContainer harvest = resultSetLoader.harvest();
        // Create vectors
        List<ValueVector> vectors = StreamSupport.stream(harvest.spliterator(), false).map(VectorWrapper::getValueVector).collect(Collectors.toList());
        // Writeable batch now owns vector buffers
        WritableBatch writableBatch = WritableBatch.getBatchNoHV(1, vectors, false);
        // Serialize the vectors
        DrillBuf byteBuf = serializeBatch(allocator, writableBatch);
        // Batch loader does NOT take ownership of the serialized buffer
        RecordBatchLoader batchLoader = new RecordBatchLoader(allocator);
        batchLoader.load(writableBatch.getDef(), byteBuf);
        // Release the serialized buffer.
        byteBuf.release();
        assertEquals(1, batchLoader.getRecordCount());
        // Free the original vectors
        writableBatch.clear();
        // Free the deserialized vectors
        batchLoader.clear();
    }
}
Also used : ResultSetLoaderImpl(org.apache.drill.exec.physical.resultSet.impl.ResultSetLoaderImpl) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) BufferAllocator(org.apache.drill.exec.memory.BufferAllocator) VectorContainer(org.apache.drill.exec.record.VectorContainer) ResultSetOptionBuilder(org.apache.drill.exec.physical.resultSet.impl.ResultSetOptionBuilder) ValueVector(org.apache.drill.exec.vector.ValueVector) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) WritableBatch(org.apache.drill.exec.record.WritableBatch) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) DrillBuf(io.netty.buffer.DrillBuf) ExecTest(org.apache.drill.exec.ExecTest) Test(org.junit.Test) VectorTest(org.apache.drill.categories.VectorTest)

Example 95 with RowSetLoader

use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.

the class MetadataHandlerBatch method writeMetadata.

private <T extends BaseMetadata & LocationProvider> VectorContainer writeMetadata(List<T> metadataList) {
    BaseMetadata firstElement = metadataList.iterator().next();
    ResultSetLoader resultSetLoader = getResultSetLoaderForMetadata(firstElement);
    resultSetLoader.startBatch();
    RowSetLoader rowWriter = resultSetLoader.writer();
    Iterator<T> segmentsIterator = metadataList.iterator();
    while (!rowWriter.isFull() && segmentsIterator.hasNext()) {
        T metadata = segmentsIterator.next();
        metadataToHandle.remove(metadata.getMetadataInfo().identifier());
        List<Object> arguments = new ArrayList<>();
        // adds required segment names to the arguments
        arguments.add(metadata.getPath().toUri().getPath());
        Collections.addAll(arguments, Arrays.copyOf(MetadataIdentifierUtils.getValuesFromMetadataIdentifier(metadata.getMetadataInfo().identifier()), popConfig.getContext().segmentColumns().size()));
        // adds column statistics values assuming that they are sorted in alphabetic order
        // (see getResultSetLoaderForMetadata() method)
        metadata.getColumnsStatistics().entrySet().stream().sorted(Comparator.comparing(e -> e.getKey().toExpr())).map(Map.Entry::getValue).flatMap(columnStatistics -> AnalyzeColumnUtils.COLUMN_STATISTICS_FUNCTIONS.keySet().stream().map(columnStatistics::get)).forEach(arguments::add);
        AnalyzeColumnUtils.META_STATISTICS_FUNCTIONS.keySet().stream().map(metadata::getStatistic).forEach(arguments::add);
        // collectedMap field value
        arguments.add(new Object[] {});
        if (metadataType == MetadataType.SEGMENT) {
            arguments.add(((SegmentMetadata) metadata).getLocations().stream().map(path -> path.toUri().getPath()).toArray(String[]::new));
        }
        if (metadataType == MetadataType.ROW_GROUP) {
            arguments.add(String.valueOf(((RowGroupMetadata) metadata).getRowGroupIndex()));
            arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.START)));
            arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.LENGTH)));
        }
        arguments.add(metadata.getSchema().jsonString());
        arguments.add(String.valueOf(metadata.getLastModifiedTime()));
        arguments.add(metadataType.name());
        rowWriter.addRow(arguments.toArray());
    }
    return resultSetLoader.harvest();
}
Also used : AbstractSingleRecordBatch(org.apache.drill.exec.record.AbstractSingleRecordBatch) MetadataType(org.apache.drill.metastore.metadata.MetadataType) Arrays(java.util.Arrays) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) VectorContainer(org.apache.drill.exec.record.VectorContainer) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ResultSetLoaderImpl(org.apache.drill.exec.physical.resultSet.impl.ResultSetLoaderImpl) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) BatchSchema(org.apache.drill.exec.record.BatchSchema) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatch(org.apache.drill.exec.record.RecordBatch) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) List(java.util.List) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) ResultSetOptionBuilder(org.apache.drill.exec.physical.resultSet.impl.ResultSetOptionBuilder) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) MetadataIdentifierUtils(org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils) MaterializedField(org.apache.drill.exec.record.MaterializedField) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) Function(java.util.function.Function) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) ArrayList(java.util.ArrayList) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) StreamSupport(java.util.stream.StreamSupport) NONE(org.apache.drill.exec.record.RecordBatch.IterOutcome.NONE) FragmentContext(org.apache.drill.exec.ops.FragmentContext) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) Logger(org.slf4j.Logger) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) Iterator(java.util.Iterator) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) StatisticsKind(org.apache.drill.metastore.statistics.StatisticsKind) MetadataHandlerPOP(org.apache.drill.exec.physical.config.MetadataHandlerPOP) LocationProvider(org.apache.drill.metastore.metadata.LocationProvider) VarCharVector(org.apache.drill.exec.vector.VarCharVector) Tables(org.apache.drill.metastore.components.tables.Tables) Comparator(java.util.Comparator) AnalyzeColumnUtils(org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils) Collections(java.util.Collections) ArrayList(java.util.ArrayList) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader)

Aggregations

RowSetLoader (org.apache.drill.exec.physical.resultSet.RowSetLoader)98 ResultSetLoader (org.apache.drill.exec.physical.resultSet.ResultSetLoader)90 Test (org.junit.Test)86 SubOperatorTest (org.apache.drill.test.SubOperatorTest)85 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)82 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)82 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)66 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)63 ScalarWriter (org.apache.drill.exec.vector.accessor.ScalarWriter)25 TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)25 ResultSetOptions (org.apache.drill.exec.physical.resultSet.impl.ResultSetLoaderImpl.ResultSetOptions)23 RowSetReader (org.apache.drill.exec.physical.rowSet.RowSetReader)17 ArrayWriter (org.apache.drill.exec.vector.accessor.ArrayWriter)16 VectorContainer (org.apache.drill.exec.record.VectorContainer)15 SchemaPath (org.apache.drill.common.expression.SchemaPath)12 DictWriter (org.apache.drill.exec.vector.accessor.DictWriter)11 EvfTest (org.apache.drill.categories.EvfTest)10 MaterializedField (org.apache.drill.exec.record.MaterializedField)9 ColumnMetadata (org.apache.drill.exec.record.metadata.ColumnMetadata)6 ArrayReader (org.apache.drill.exec.vector.accessor.ArrayReader)5