Search in sources :

Example 1 with RepeatedListWriter

use of org.apache.drill.exec.vector.accessor.writer.RepeatedListWriter in project drill by apache.

the class ColumnBuilder method buildRepeatedList.

private ColumnState buildRepeatedList(ContainerState parent, ColumnMetadata columnSchema) {
    assert columnSchema.type() == MinorType.LIST;
    assert columnSchema.mode() == DataMode.REPEATED;
    // the element type after creating the repeated writer itself.
    assert columnSchema.childSchema() == null;
    // Build the repeated vector.
    final RepeatedListVector vector = new RepeatedListVector(columnSchema.emptySchema(), parent.loader().allocator(), null);
    // No inner type yet. The result set loader builds the subtype
    // incrementally because it might be complex (a map or another
    // repeated list.) To start, use a dummy to avoid need for if-statements
    // everywhere.
    final ColumnMetadata dummyElementSchema = new PrimitiveColumnMetadata(MaterializedField.create(columnSchema.name(), Types.repeated(MinorType.NULL)));
    final AbstractObjectWriter dummyElement = ColumnWriterFactory.buildDummyColumnWriter(dummyElementSchema);
    // Create the list writer: an array of arrays.
    final AbstractObjectWriter arrayWriter = RepeatedListWriter.buildRepeatedList(columnSchema, vector, dummyElement);
    // Create the list vector state that tracks the list vector lifecycle.
    final RepeatedListVectorState vectorState = new RepeatedListVectorState(arrayWriter, vector);
    // Build the container that tracks the array contents
    final RepeatedListState listState = new RepeatedListState(parent.loader(), parent.vectorCache().childCache(columnSchema.name()));
    // Bind the list state as the list event listener.
    ((RepeatedListWriter) arrayWriter.array()).bindListener(listState);
    // propagate events down to the (one and only) child state.
    return new RepeatedListColumnState(parent.loader(), arrayWriter, vectorState, listState);
}
Also used : ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) PrimitiveColumnMetadata(org.apache.drill.exec.record.metadata.PrimitiveColumnMetadata) PrimitiveColumnMetadata(org.apache.drill.exec.record.metadata.PrimitiveColumnMetadata) RepeatedListVector(org.apache.drill.exec.vector.complex.RepeatedListVector) RepeatedListWriter(org.apache.drill.exec.vector.accessor.writer.RepeatedListWriter) RepeatedListColumnState(org.apache.drill.exec.physical.resultSet.impl.RepeatedListState.RepeatedListColumnState) RepeatedListVectorState(org.apache.drill.exec.physical.resultSet.impl.RepeatedListState.RepeatedListVectorState) AbstractObjectWriter(org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter)

Example 2 with RepeatedListWriter

use of org.apache.drill.exec.vector.accessor.writer.RepeatedListWriter in project drill by apache.

the class TestResultSetLoaderRepeatedList method test2DLateSchemaIncremental.

@Test
public void test2DLateSchemaIncremental() {
    final TupleMetadata schema = new SchemaBuilder().add("id", MinorType.INT).addRepeatedList("list1").addArray(MinorType.VARCHAR).resumeSchema().addRepeatedList("list2").addArray(MinorType.VARCHAR).resumeSchema().buildSchema();
    final ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().build();
    final ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    final RowSetLoader writer = rsLoader.writer();
    // Add columns dynamically
    writer.addColumn(schema.metadata(0));
    // Write a row without the array.
    rsLoader.startBatch();
    writer.addRow(1);
    // Add the repeated list, but without contents.
    writer.addColumn(schema.metadata(1).cloneEmpty());
    // Sanity check of writer structure
    assertEquals(2, writer.size());
    final ObjectWriter listObj = writer.column("list1");
    assertEquals(ObjectType.ARRAY, listObj.type());
    final ArrayWriter listWriter = listObj.array();
    // No child defined yet. A dummy child is inserted instead.
    assertEquals(MinorType.NULL, listWriter.entry().schema().type());
    assertEquals(ObjectType.ARRAY, listWriter.entryType());
    assertEquals(ObjectType.SCALAR, listWriter.array().entryType());
    assertEquals(ValueType.NULL, listWriter.array().scalar().valueType());
    // Although we don't know the type of the inner, we can still
    // create null (empty) elements in the outer array.
    writer.addRow(2, null).addRow(3, objArray()).addRow(4, objArray(objArray(), null));
    // Define the inner type.
    final RepeatedListWriter listWriterImpl = (RepeatedListWriter) listWriter;
    listWriterImpl.defineElement(MaterializedField.create("list1", Types.repeated(MinorType.VARCHAR)));
    // Sanity check of completed structure
    assertEquals(ObjectType.ARRAY, listWriter.entryType());
    final ArrayWriter innerWriter = listWriter.array();
    assertEquals(ObjectType.SCALAR, innerWriter.entryType());
    final ScalarWriter strWriter = innerWriter.scalar();
    assertEquals(ValueType.STRING, strWriter.valueType());
    // Write values
    writer.addRow(5, objArray(strArray("a1", "b1"), strArray("c1", "d1")));
    // Add the second list, with a complete type
    writer.addColumn(schema.metadata(2));
    // Sanity check of writer structure
    assertEquals(3, writer.size());
    final ObjectWriter list2Obj = writer.column("list2");
    assertEquals(ObjectType.ARRAY, list2Obj.type());
    final ArrayWriter list2Writer = list2Obj.array();
    assertEquals(ObjectType.ARRAY, list2Writer.entryType());
    final ArrayWriter inner2Writer = list2Writer.array();
    assertEquals(ObjectType.SCALAR, inner2Writer.entryType());
    final ScalarWriter str2Writer = inner2Writer.scalar();
    assertEquals(ValueType.STRING, str2Writer.valueType());
    // Write values
    writer.addRow(6, objArray(strArray("a2", "b2"), strArray("c2", "d2")), objArray(strArray("w2", "x2"), strArray("y2", "z2")));
    // Add the second list, with a complete type
    // Verify the values.
    // (Relies on the row set level repeated list tests having passed.)
    final RowSet expected = fixture.rowSetBuilder(schema).addRow(1, objArray(), objArray()).addRow(2, objArray(), objArray()).addRow(3, objArray(), objArray()).addRow(4, objArray(objArray(), null), objArray()).addRow(5, objArray(strArray("a1", "b1"), strArray("c1", "d1")), objArray()).addRow(6, objArray(strArray("a2", "b2"), strArray("c2", "d2")), objArray(strArray("w2", "x2"), strArray("y2", "z2"))).build();
    RowSetUtilities.verify(expected, fixture.wrap(rsLoader.harvest()));
    rsLoader.close();
}
Also used : ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) RepeatedListWriter(org.apache.drill.exec.vector.accessor.writer.RepeatedListWriter) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) ObjectWriter(org.apache.drill.exec.vector.accessor.ObjectWriter) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) ArrayWriter(org.apache.drill.exec.vector.accessor.ArrayWriter) ScalarWriter(org.apache.drill.exec.vector.accessor.ScalarWriter) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 3 with RepeatedListWriter

use of org.apache.drill.exec.vector.accessor.writer.RepeatedListWriter in project drill by apache.

the class TestResultSetLoaderRepeatedList method test2DLateSchema.

@Test
public void test2DLateSchema() {
    final TupleMetadata schema = new SchemaBuilder().add("id", MinorType.INT).addRepeatedList("list2").addArray(MinorType.VARCHAR).resumeSchema().buildSchema();
    final ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().build();
    final ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    final RowSetLoader writer = rsLoader.writer();
    // Add columns dynamically
    writer.addColumn(schema.metadata(0));
    writer.addColumn(schema.metadata(1).cloneEmpty());
    // Yes, this is ugly. The whole repeated array idea is awkward.
    // The only place it is used at present is in JSON where the
    // awkwardness is mixed in with JSON complexity.
    // Consider improving this API in the future.
    ((RepeatedListWriter) writer.array(1)).defineElement(schema.metadata(1).childSchema());
    do2DTest(schema, rsLoader);
    rsLoader.close();
}
Also used : ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) RepeatedListWriter(org.apache.drill.exec.vector.accessor.writer.RepeatedListWriter) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 4 with RepeatedListWriter

use of org.apache.drill.exec.vector.accessor.writer.RepeatedListWriter in project drill by apache.

the class BuildFromSchema method buildRepeatedList.

/**
 * Expand a repeated list. The list may be multi-dimensional, meaning that
 * it may have may layers of other repeated lists before we get to the element
 * (inner-most) array.
 *
 * @param parent tuple writer for the tuple that holds the array
 * @param colSchema schema definition of the array
 */
private ObjectWriter buildRepeatedList(ParentShim parent, ColumnMetadata colSchema) {
    final ObjectWriter objWriter = parent.add(colSchema.cloneEmpty());
    final RepeatedListWriter listWriter = (RepeatedListWriter) objWriter.array();
    final ColumnMetadata elements = colSchema.childSchema();
    if (elements != null) {
        final RepeatedListShim listShim = new RepeatedListShim(listWriter);
        buildColumn(listShim, elements);
    }
    return objWriter;
}
Also used : ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) RepeatedListWriter(org.apache.drill.exec.vector.accessor.writer.RepeatedListWriter) ObjectWriter(org.apache.drill.exec.vector.accessor.ObjectWriter)

Aggregations

RepeatedListWriter (org.apache.drill.exec.vector.accessor.writer.RepeatedListWriter)4 ResultSetLoader (org.apache.drill.exec.physical.resultSet.ResultSetLoader)2 RowSetLoader (org.apache.drill.exec.physical.resultSet.RowSetLoader)2 ColumnMetadata (org.apache.drill.exec.record.metadata.ColumnMetadata)2 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)2 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)2 ObjectWriter (org.apache.drill.exec.vector.accessor.ObjectWriter)2 SubOperatorTest (org.apache.drill.test.SubOperatorTest)2 Test (org.junit.Test)2 RepeatedListColumnState (org.apache.drill.exec.physical.resultSet.impl.RepeatedListState.RepeatedListColumnState)1 RepeatedListVectorState (org.apache.drill.exec.physical.resultSet.impl.RepeatedListState.RepeatedListVectorState)1 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)1 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)1 PrimitiveColumnMetadata (org.apache.drill.exec.record.metadata.PrimitiveColumnMetadata)1 ArrayWriter (org.apache.drill.exec.vector.accessor.ArrayWriter)1 ScalarWriter (org.apache.drill.exec.vector.accessor.ScalarWriter)1 AbstractObjectWriter (org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter)1 RepeatedListVector (org.apache.drill.exec.vector.complex.RepeatedListVector)1