Search in sources :

Example 26 with ScanBatch

use of org.apache.drill.exec.physical.impl.ScanBatch in project drill by axbaretto.

the class TestOutputBatchSize method getExpectedSize.

/**
 *  Figures out what will be total size of the batches for a given Json input batch.
 */
private long getExpectedSize(List<String> expectedJsonBatches) throws ExecutionSetupException {
    // Create a dummy scanBatch to figure out the size.
    RecordBatch scanBatch = new ScanBatch(new MockPhysicalOperator(), fragContext, getReaderListForJsonBatches(expectedJsonBatches, fragContext));
    Iterable<VectorAccessible> batches = new BatchIterator(scanBatch);
    long totalSize = 0;
    for (VectorAccessible batch : batches) {
        RecordBatchSizer sizer = new RecordBatchSizer(batch);
        totalSize += sizer.netSize();
    }
    return totalSize;
}
Also used : RecordBatchSizer(org.apache.drill.exec.record.RecordBatchSizer) VectorAccessible(org.apache.drill.exec.record.VectorAccessible) RecordBatch(org.apache.drill.exec.record.RecordBatch) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch)

Example 27 with ScanBatch

use of org.apache.drill.exec.physical.impl.ScanBatch in project drill by axbaretto.

the class TestOutputBatchSize method testSizerRepeatedList.

@Test
public void testSizerRepeatedList() throws Exception {
    List<String> inputJsonBatches = Lists.newArrayList();
    StringBuilder batchString = new StringBuilder();
    StringBuilder newString = new StringBuilder();
    newString.append("[ [1,2,3,4], [5,6,7,8] ]");
    numRows = 9;
    batchString.append("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append("{\"c\" : " + newString);
        batchString.append("},");
    }
    batchString.append("{\"c\" : " + newString);
    batchString.append("}");
    batchString.append("]");
    inputJsonBatches.add(batchString.toString());
    // Create a dummy scanBatch to figure out the size.
    RecordBatch scanBatch = new ScanBatch(new MockPhysicalOperator(), fragContext, getReaderListForJsonBatches(inputJsonBatches, fragContext));
    VectorAccessible va = new BatchIterator(scanBatch).iterator().next();
    RecordBatchSizer sizer = new RecordBatchSizer(va);
    assertEquals(1, sizer.columns().size());
    RecordBatchSizer.ColumnSize column = sizer.columns().get("c");
    assertNotNull(column);
    /**
     * stdDataSize:8*10*10, stdNetSize:8*10*10 + 4*10 + 4*10 + 4,
     * dataSizePerEntry:8*8, netSizePerEntry:8*8 + 4*2 + 4,
     * totalDataSize:8*8*10, totalNetSize:netSizePerEntry*10, valueCount:10,
     * elementCount:10, estElementCountPerArray:1, isVariableWidth:false
     */
    assertEquals(800, column.getStdDataSizePerEntry());
    assertEquals(884, column.getStdNetSizePerEntry());
    assertEquals(64, column.getDataSizePerEntry());
    assertEquals(76, column.getNetSizePerEntry());
    assertEquals(640, column.getTotalDataSize());
    assertEquals(760, column.getTotalNetSize());
    assertEquals(10, column.getValueCount());
    assertEquals(20, column.getElementCount());
    assertEquals(2, column.getCardinality(), 0.01);
    assertEquals(false, column.isVariableWidth());
    final int testRowCount = 1000;
    final int testRowCountPowerTwo = 2048;
    for (VectorWrapper<?> vw : va) {
        ValueVector v = vw.getValueVector();
        v.clear();
        RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
        // Allocates to nearest power of two
        colSize.allocateVector(v, testRowCount);
        // offset vector of delegate vector i.e. outer array should have row count number of values.
        UInt4Vector offsetVector = ((RepeatedListVector) v).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
        // Get inner vector of delegate vector.
        ValueVector vector = ((RepeatedValueVector) v).getDataVector();
        // Data vector of inner vector should
        // have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values.
        ValueVector dataVector = ((RepeatedValueVector) vector).getDataVector();
        assertEquals(Integer.highestOneBit((testRowCount * 8) << 1), dataVector.getValueCapacity());
        // offset vector of inner vector should have
        // 2 (outer array cardinality) * row count number of values.
        offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount * 2) << 1), offsetVector.getValueCapacity());
        v.clear();
        // Allocates the same as value passed since it is already power of two.
        // -1 is done for adjustment needed for offset vector.
        colSize.allocateVector(v, testRowCountPowerTwo - 1);
        // offset vector of delegate vector i.e. outer array should have row count number of values.
        offsetVector = ((RepeatedListVector) v).getOffsetVector();
        assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
        // Get inner vector of delegate vector.
        vector = ((RepeatedValueVector) v).getDataVector();
        // Data vector of inner vector should
        // have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values.
        dataVector = ((RepeatedValueVector) vector).getDataVector();
        assertEquals(testRowCountPowerTwo * 8, dataVector.getValueCapacity());
        // offset vector of inner vector should have
        // 2 (outer array cardinality) * row count number of values.
        offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
        assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity());
        v.clear();
        // MAX ROW COUNT
        colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
        // offset vector of delegate vector i.e. outer array should have row count number of values.
        offsetVector = ((RepeatedListVector) v).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
        // Get inner vector of delegate vector.
        vector = ((RepeatedValueVector) v).getDataVector();
        // Data vector of inner vector should
        // have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values.
        dataVector = ((RepeatedValueVector) vector).getDataVector();
        assertEquals(ValueVector.MAX_ROW_COUNT * 8, dataVector.getValueCapacity());
        // offset vector of inner vector should have
        // 2 (outer array cardinality) * row count number of values.
        offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT * 2, offsetVector.getValueCapacity());
        v.clear();
        // MIN ROW COUNT
        colSize.allocateVector(v, 0);
        // offset vector of delegate vector i.e. outer array should have 1 value.
        offsetVector = ((RepeatedListVector) v).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
        // Get inner vector of delegate vector.
        vector = ((RepeatedValueVector) v).getDataVector();
        // Data vector of inner vector should have 1 value
        dataVector = ((RepeatedValueVector) vector).getDataVector();
        assertEquals(ValueVector.MIN_ROW_COUNT, dataVector.getValueCapacity());
        // offset vector of inner vector should have
        // 2 (outer array cardinality) * 1.
        offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT * 2, offsetVector.getValueCapacity());
        v.clear();
    }
}
Also used : VectorAccessible(org.apache.drill.exec.record.VectorAccessible) RecordBatch(org.apache.drill.exec.record.RecordBatch) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) ValueVector(org.apache.drill.exec.vector.ValueVector) RecordBatchSizer(org.apache.drill.exec.record.RecordBatchSizer) RepeatedListVector(org.apache.drill.exec.vector.complex.RepeatedListVector) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) Test(org.junit.Test)

Example 28 with ScanBatch

use of org.apache.drill.exec.physical.impl.ScanBatch in project drill by axbaretto.

the class MongoScanBatchCreator method getBatch.

@Override
public ScanBatch getBatch(ExecutorFragmentContext context, MongoSubScan subScan, List<RecordBatch> children) throws ExecutionSetupException {
    Preconditions.checkArgument(children.isEmpty());
    List<RecordReader> readers = new LinkedList<>();
    List<SchemaPath> columns = null;
    for (MongoSubScan.MongoSubScanSpec scanSpec : subScan.getChunkScanSpecList()) {
        try {
            if ((columns = subScan.getColumns()) == null) {
                columns = GroupScan.ALL_COLUMNS;
            }
            readers.add(new MongoRecordReader(scanSpec, columns, context, subScan.getMongoStoragePlugin()));
        } catch (Exception e) {
            logger.error("MongoRecordReader creation failed for subScan:  " + subScan + ".");
            logger.error(e.getMessage(), e);
            throw new ExecutionSetupException(e);
        }
    }
    logger.info("Number of record readers initialized : " + readers.size());
    return new ScanBatch(subScan, context, readers);
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordReader(org.apache.drill.exec.store.RecordReader) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) LinkedList(java.util.LinkedList) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException)

Example 29 with ScanBatch

use of org.apache.drill.exec.physical.impl.ScanBatch in project drill by axbaretto.

the class KuduScanBatchCreator method getBatch.

@Override
public ScanBatch getBatch(ExecutorFragmentContext context, KuduSubScan subScan, List<RecordBatch> children) throws ExecutionSetupException {
    Preconditions.checkArgument(children.isEmpty());
    List<RecordReader> readers = new LinkedList<>();
    List<SchemaPath> columns = null;
    for (KuduSubScan.KuduSubScanSpec scanSpec : subScan.getTabletScanSpecList()) {
        try {
            if ((columns = subScan.getColumns()) == null) {
                columns = GroupScan.ALL_COLUMNS;
            }
            readers.add(new KuduRecordReader(subScan.getStorageEngine().getClient(), scanSpec, columns));
        } catch (Exception e1) {
            throw new ExecutionSetupException(e1);
        }
    }
    return new ScanBatch(subScan, context, readers);
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordReader(org.apache.drill.exec.store.RecordReader) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) LinkedList(java.util.LinkedList) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException)

Example 30 with ScanBatch

use of org.apache.drill.exec.physical.impl.ScanBatch in project drill by axbaretto.

the class OpenTSDBBatchCreator method getBatch.

@Override
public CloseableRecordBatch getBatch(ExecutorFragmentContext context, OpenTSDBSubScan subScan, List<RecordBatch> children) throws ExecutionSetupException {
    List<RecordReader> readers = new LinkedList<>();
    List<SchemaPath> columns;
    for (OpenTSDBSubScan.OpenTSDBSubScanSpec scanSpec : subScan.getTabletScanSpecList()) {
        try {
            if ((columns = subScan.getColumns()) == null) {
                columns = GroupScan.ALL_COLUMNS;
            }
            readers.add(new OpenTSDBRecordReader(subScan.getStorageEngine().getClient(), scanSpec, columns));
        } catch (Exception e) {
            throw new ExecutionSetupException(e);
        }
    }
    return new ScanBatch(subScan, context, readers);
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordReader(org.apache.drill.exec.store.RecordReader) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) LinkedList(java.util.LinkedList) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException)

Aggregations

ScanBatch (org.apache.drill.exec.physical.impl.ScanBatch)40 RecordReader (org.apache.drill.exec.store.RecordReader)31 ExecutionSetupException (org.apache.drill.common.exceptions.ExecutionSetupException)26 LinkedList (java.util.LinkedList)16 SchemaPath (org.apache.drill.common.expression.SchemaPath)15 IOException (java.io.IOException)8 Map (java.util.Map)8 OperatorContext (org.apache.drill.exec.ops.OperatorContext)7 RecordBatch (org.apache.drill.exec.record.RecordBatch)7 RecordBatchSizer (org.apache.drill.exec.record.RecordBatchSizer)6 VectorAccessible (org.apache.drill.exec.record.VectorAccessible)6 DrillFileSystem (org.apache.drill.exec.store.dfs.DrillFileSystem)6 ValueVector (org.apache.drill.exec.vector.ValueVector)5 Path (org.apache.hadoop.fs.Path)5 ParquetMetadata (org.apache.parquet.hadoop.metadata.ParquetMetadata)5 ColumnExplorer (org.apache.drill.exec.store.ColumnExplorer)4 UInt4Vector (org.apache.drill.exec.vector.UInt4Vector)4 RepeatedListVector (org.apache.drill.exec.vector.complex.RepeatedListVector)4 RepeatedValueVector (org.apache.drill.exec.vector.complex.RepeatedValueVector)4 Test (org.junit.Test)4