Search in sources :

Example 11 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by apache.

the class TestOutputBatchSize method testSizerRepeatedRepeatedList.

@Test
public void testSizerRepeatedRepeatedList() throws Exception {
    List<String> inputJsonBatches = Lists.newArrayList();
    StringBuilder batchString = new StringBuilder();
    StringBuilder newString = new StringBuilder();
    newString.append("[ [[1,2,3,4], [5,6,7,8]], [[1,2,3,4], [5,6,7,8]] ]");
    numRows = 9;
    batchString.append("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append("{\"c\" : " + newString);
        batchString.append("},");
    }
    batchString.append("{\"c\" : " + newString);
    batchString.append("}");
    batchString.append("]");
    inputJsonBatches.add(batchString.toString());
    // Create a dummy scanBatch to figure out the size.
    RecordBatch scanBatch = new ScanBatch(new MockPhysicalOperator(), fragContext, getReaderListForJsonBatches(inputJsonBatches, fragContext));
    VectorAccessible va = new BatchIterator(scanBatch).iterator().next();
    RecordBatchSizer sizer = new RecordBatchSizer(va);
    assertEquals(1, sizer.columns().size());
    RecordBatchSizer.ColumnSize column = sizer.columns().get("c");
    assertNotNull(column);
    /**
     * stdDataSize:8*5*5*5, stdNetSize:8*5*5*5 + 8*5*5 + 8*5 + 4,
     * dataSizePerEntry:16*8, netSizePerEntry:16*8 + 16*4 + 4*2 + 4*2,
     * totalDataSize:16*8*10, totalNetSize:netSizePerEntry*10, valueCount:10,
     * elementCount:10, estElementCountPerArray:1, isVariableWidth:false
     */
    assertEquals(1000, column.getStdDataSizePerEntry());
    assertEquals(1244, column.getStdNetSizePerEntry());
    assertEquals(128, column.getDataSizePerEntry());
    assertEquals(156, column.getNetSizePerEntry());
    assertEquals(1280, column.getTotalDataSize());
    assertEquals(1560, column.getTotalNetSize());
    assertEquals(10, column.getValueCount());
    assertEquals(20, column.getElementCount());
    assertEquals(2, column.getCardinality(), 0.01);
    assertEquals(false, column.isVariableWidth());
    final int testRowCount = 1000;
    final int testRowCountPowerTwo = 2048;
    for (VectorWrapper<?> vw : va) {
        ValueVector v = vw.getValueVector();
        v.clear();
        RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
        // Allocates to nearest power of two
        colSize.allocateVector(v, testRowCount);
        // offset vector of delegate vector i.e. outer array should have row count number of values.
        UInt4Vector offsetVector = ((RepeatedListVector) v).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
        // Get data vector of delegate vector. This is repeated list again
        ValueVector dataVector = ((RepeatedListVector) v).getDataVector();
        // offset vector of delegate vector of the inner repeated list
        // This should have row count * 2 number of values.
        offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount * 2) << 1), offsetVector.getValueCapacity());
        // Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
        ValueVector innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
        assertEquals((Integer.highestOneBit((testRowCount * 2) << 1) - 1), dataVector.getValueCapacity());
        // offset vector of inner vector should have
        // 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
        offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount * 4) << 1), offsetVector.getValueCapacity());
        // Data vector of inner vector should
        // have 2 (outer array cardinality) * 2 (inner array cardinality)  * row count number of values.
        dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
        assertEquals(Integer.highestOneBit(testRowCount << 1) * 16, dataVector.getValueCapacity());
        v.clear();
        // Allocates the same as value passed since it is already power of two.
        // -1 is done for adjustment needed for offset vector.
        colSize.allocateVector(v, testRowCountPowerTwo - 1);
        // offset vector of delegate vector i.e. outer array should have row count number of values.
        offsetVector = ((RepeatedListVector) v).getOffsetVector();
        assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
        // Get data vector of delegate vector. This is repeated list again
        dataVector = ((RepeatedListVector) v).getDataVector();
        // offset vector of delegate vector of the inner repeated list
        // This should have row count * 2 number of values.
        offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
        assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity());
        // Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
        innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
        assertEquals(testRowCountPowerTwo * 2 - 1, dataVector.getValueCapacity());
        // offset vector of inner vector should have
        // 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
        offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
        assertEquals(testRowCountPowerTwo * 4, offsetVector.getValueCapacity());
        // Data vector of inner vector should
        // have 2 (outer array cardinality) * 2 (inner array cardinality)  * row count number of values.
        dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
        assertEquals(testRowCountPowerTwo * 16, dataVector.getValueCapacity());
        v.clear();
        // MAX ROW COUNT
        colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
        // offset vector of delegate vector i.e. outer array should have row count number of values.
        offsetVector = ((RepeatedListVector) v).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
        // Get data vector of delegate vector. This is repeated list again
        dataVector = ((RepeatedListVector) v).getDataVector();
        // offset vector of delegate vector of the inner repeated list
        // This should have row count * 2 number of values.
        offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT * 2, offsetVector.getValueCapacity());
        // Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
        innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
        assertEquals(ValueVector.MAX_ROW_COUNT * 2 - 1, dataVector.getValueCapacity());
        // offset vector of inner vector should have
        // 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
        offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT * 4, offsetVector.getValueCapacity());
        // Data vector of inner vector should
        // have 2 (outer array cardinality) * 2 (inner array cardinality)  * row count number of values.
        dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
        assertEquals(ValueVector.MAX_ROW_COUNT * 16, dataVector.getValueCapacity());
        v.clear();
        // MIN ROW COUNT
        colSize.allocateVector(v, 0);
        // offset vector of delegate vector i.e. outer array should have 1 value.
        offsetVector = ((RepeatedListVector) v).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
        // Get data vector of delegate vector. This is repeated list again
        dataVector = ((RepeatedListVector) v).getDataVector();
        // offset vector of delegate vector of the inner repeated list
        offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
        // offset vector of inner vector should have
        // 2 (outer array cardinality) * 1.
        offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT * 2, offsetVector.getValueCapacity());
        // Data vector of inner vector should 1 value.
        dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
        assertEquals(ValueVector.MIN_ROW_COUNT, dataVector.getValueCapacity());
        v.clear();
    }
}
Also used : VectorAccessible(org.apache.drill.exec.record.VectorAccessible) RecordBatch(org.apache.drill.exec.record.RecordBatch) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) ValueVector(org.apache.drill.exec.vector.ValueVector) RecordBatchSizer(org.apache.drill.exec.record.RecordBatchSizer) RepeatedListVector(org.apache.drill.exec.vector.complex.RepeatedListVector) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) Test(org.junit.Test)

Example 12 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by apache.

the class DumpCat method getBatchMetaInfo.

/* Get batch meta info : rows, selectedRows, dataSize */
private BatchMetaInfo getBatchMetaInfo(VectorAccessibleSerializable vcSerializable) {
    final VectorAccessible vectorContainer = vcSerializable.get();
    int rows;
    int selectedRows;
    int totalDataSize = 0;
    rows = vectorContainer.getRecordCount();
    selectedRows = rows;
    if (vectorContainer.getSchema().getSelectionVectorMode() == SelectionVectorMode.TWO_BYTE) {
        selectedRows = vcSerializable.getSv2().getCount();
    }
    for (final VectorWrapper<?> w : vectorContainer) {
        totalDataSize += w.getValueVector().getBufferSize();
    }
    return new BatchMetaInfo(rows, selectedRows, totalDataSize);
}
Also used : VectorAccessible(org.apache.drill.exec.record.VectorAccessible)

Example 13 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by apache.

the class TestTraceOutputDump method testFilter.

@Test
public void testFilter() throws Throwable {
    final DrillbitContext bitContext = mockDrillbitContext();
    final UserClientConnection connection = Mockito.mock(UserClientConnection.class);
    final PhysicalPlanReader reader = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(c);
    final PhysicalPlan plan = reader.readPhysicalPlan(Files.asCharSource(DrillFileUtils.getResourceAsFile("/trace/simple_trace.json"), Charsets.UTF_8).read());
    final FunctionImplementationRegistry registry = new FunctionImplementationRegistry(c);
    final FragmentContextImpl context = new FragmentContextImpl(bitContext, PlanFragment.getDefaultInstance(), connection, registry);
    final SimpleRootExec exec = new SimpleRootExec(ImplCreator.getExec(context, (FragmentRoot) plan.getSortedOperators(false).iterator().next()));
    while (exec.next()) {
    }
    exec.close();
    if (context.getExecutorState().getFailureCause() != null) {
        throw context.getExecutorState().getFailureCause();
    }
    assertTrue(!context.getExecutorState().isFailed());
    final FragmentHandle handle = context.getHandle();
    /* Form the file name to which the trace output will dump the record batches */
    final String qid = QueryIdHelper.getQueryId(handle.getQueryId());
    final int majorFragmentId = handle.getMajorFragmentId();
    final int minorFragmentId = handle.getMinorFragmentId();
    final String logLocation = c.getString(ExecConstants.TRACE_DUMP_DIRECTORY);
    final String filename = String.format("%s//%s_%d_%d_mock-scan", logLocation, qid, majorFragmentId, minorFragmentId);
    final Configuration conf = new Configuration();
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, c.getString(ExecConstants.TRACE_DUMP_FILESYSTEM));
    final FileSystem fs = FileSystem.get(conf);
    final Path path = new Path(filename);
    assertTrue("Trace file does not exist", fs.exists(path));
    final FSDataInputStream in = fs.open(path);
    final VectorAccessibleSerializable wrap = new VectorAccessibleSerializable(context.getAllocator());
    wrap.readFromStream(in);
    final VectorAccessible container = wrap.get();
    /* Assert there are no selection vectors */
    assertNull(wrap.getSv2());
    /* Assert there is only one record */
    assertEquals(1, container.getRecordCount());
    /* Read the Integer value and ASSERT its Integer.MIN_VALUE */
    final int value = (int) container.iterator().next().getValueVector().getAccessor().getObject(0);
    assertEquals(value, Integer.MIN_VALUE);
}
Also used : DrillbitContext(org.apache.drill.exec.server.DrillbitContext) Path(org.apache.hadoop.fs.Path) VectorAccessibleSerializable(org.apache.drill.exec.cache.VectorAccessibleSerializable) PhysicalPlan(org.apache.drill.exec.physical.PhysicalPlan) Configuration(org.apache.hadoop.conf.Configuration) VectorAccessible(org.apache.drill.exec.record.VectorAccessible) PhysicalPlanReader(org.apache.drill.exec.planner.PhysicalPlanReader) FragmentContextImpl(org.apache.drill.exec.ops.FragmentContextImpl) FragmentRoot(org.apache.drill.exec.physical.base.FragmentRoot) FragmentHandle(org.apache.drill.exec.proto.ExecProtos.FragmentHandle) SimpleRootExec(org.apache.drill.exec.physical.impl.SimpleRootExec) FileSystem(org.apache.hadoop.fs.FileSystem) UserClientConnection(org.apache.drill.exec.rpc.UserClientConnection) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FunctionImplementationRegistry(org.apache.drill.exec.expr.fn.FunctionImplementationRegistry) ExecTest(org.apache.drill.exec.ExecTest) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test)

Example 14 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by apache.

the class TestBatchValidator method testRepeatedBadArrayOffset.

@Test
public void testRepeatedBadArrayOffset() {
    TupleMetadata schema = new SchemaBuilder().add("a", MinorType.VARCHAR, DataMode.REPEATED).buildSchema();
    SingleRowSet batch = fixture.rowSetBuilder(schema).addRow((Object) strArray()).addRow((Object) strArray("fred", "barney", "wilma")).addRow((Object) strArray("dino")).build();
    VectorAccessible va = batch.vectorAccessible();
    ValueVector v = va.iterator().next().getValueVector();
    RepeatedVarCharVector vc = (RepeatedVarCharVector) v;
    UInt4Vector ov = vc.getOffsetVector();
    ov.getMutator().set(3, 1);
    checkForError(batch, BAD_OFFSETS);
    batch.clear();
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) VectorAccessible(org.apache.drill.exec.record.VectorAccessible) RepeatedVarCharVector(org.apache.drill.exec.vector.RepeatedVarCharVector) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 15 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by apache.

the class TestBatchValidator method zapOffset.

public void zapOffset(SingleRowSet batch, int index, int bogusValue) {
    // Here we are evil: stomp on an offset to simulate corruption.
    // Don't do this in real code!
    VectorAccessible va = batch.vectorAccessible();
    ValueVector v = va.iterator().next().getValueVector();
    VarCharVector vc = (VarCharVector) v;
    UInt4Vector ov = vc.getOffsetVector();
    ov.getMutator().set(index, bogusValue);
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) VectorAccessible(org.apache.drill.exec.record.VectorAccessible) RepeatedVarCharVector(org.apache.drill.exec.vector.RepeatedVarCharVector) VarCharVector(org.apache.drill.exec.vector.VarCharVector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector)

Aggregations

VectorAccessible (org.apache.drill.exec.record.VectorAccessible)32 ValueVector (org.apache.drill.exec.vector.ValueVector)17 Test (org.junit.Test)14 UInt4Vector (org.apache.drill.exec.vector.UInt4Vector)12 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)8 RecordBatchSizer (org.apache.drill.exec.record.RecordBatchSizer)8 RepeatedVarCharVector (org.apache.drill.exec.vector.RepeatedVarCharVector)8 BatchSchema (org.apache.drill.exec.record.BatchSchema)7 MaterializedField (org.apache.drill.exec.record.MaterializedField)7 ScanBatch (org.apache.drill.exec.physical.impl.ScanBatch)6 RecordBatch (org.apache.drill.exec.record.RecordBatch)6 VarCharVector (org.apache.drill.exec.vector.VarCharVector)6 ExecTest (org.apache.drill.exec.ExecTest)4 FragmentContextImpl (org.apache.drill.exec.ops.FragmentContextImpl)4 SelectionVector2 (org.apache.drill.exec.record.selection.SelectionVector2)4 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)4 Text (org.apache.drill.exec.util.Text)4 RepeatedListVector (org.apache.drill.exec.vector.complex.RepeatedListVector)4 RepeatedValueVector (org.apache.drill.exec.vector.complex.RepeatedValueVector)4 List (java.util.List)3