use of org.apache.drill.exec.physical.impl.ScanBatch in project drill by axbaretto.
the class TestOutputBatchSize method getExpectedSize.
/**
* Figures out what will be total size of the batches for a given Json input batch.
*/
private long getExpectedSize(List<String> expectedJsonBatches) throws ExecutionSetupException {
// Create a dummy scanBatch to figure out the size.
RecordBatch scanBatch = new ScanBatch(new MockPhysicalOperator(), fragContext, getReaderListForJsonBatches(expectedJsonBatches, fragContext));
Iterable<VectorAccessible> batches = new BatchIterator(scanBatch);
long totalSize = 0;
for (VectorAccessible batch : batches) {
RecordBatchSizer sizer = new RecordBatchSizer(batch);
totalSize += sizer.netSize();
}
return totalSize;
}
use of org.apache.drill.exec.physical.impl.ScanBatch in project drill by axbaretto.
the class TestOutputBatchSize method testSizerRepeatedList.
@Test
public void testSizerRepeatedList() throws Exception {
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
StringBuilder newString = new StringBuilder();
newString.append("[ [1,2,3,4], [5,6,7,8] ]");
numRows = 9;
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"c\" : " + newString);
batchString.append("},");
}
batchString.append("{\"c\" : " + newString);
batchString.append("}");
batchString.append("]");
inputJsonBatches.add(batchString.toString());
// Create a dummy scanBatch to figure out the size.
RecordBatch scanBatch = new ScanBatch(new MockPhysicalOperator(), fragContext, getReaderListForJsonBatches(inputJsonBatches, fragContext));
VectorAccessible va = new BatchIterator(scanBatch).iterator().next();
RecordBatchSizer sizer = new RecordBatchSizer(va);
assertEquals(1, sizer.columns().size());
RecordBatchSizer.ColumnSize column = sizer.columns().get("c");
assertNotNull(column);
/**
* stdDataSize:8*10*10, stdNetSize:8*10*10 + 4*10 + 4*10 + 4,
* dataSizePerEntry:8*8, netSizePerEntry:8*8 + 4*2 + 4,
* totalDataSize:8*8*10, totalNetSize:netSizePerEntry*10, valueCount:10,
* elementCount:10, estElementCountPerArray:1, isVariableWidth:false
*/
assertEquals(800, column.getStdDataSizePerEntry());
assertEquals(884, column.getStdNetSizePerEntry());
assertEquals(64, column.getDataSizePerEntry());
assertEquals(76, column.getNetSizePerEntry());
assertEquals(640, column.getTotalDataSize());
assertEquals(760, column.getTotalNetSize());
assertEquals(10, column.getValueCount());
assertEquals(20, column.getElementCount());
assertEquals(2, column.getCardinality(), 0.01);
assertEquals(false, column.isVariableWidth());
final int testRowCount = 1000;
final int testRowCountPowerTwo = 2048;
for (VectorWrapper<?> vw : va) {
ValueVector v = vw.getValueVector();
v.clear();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
// offset vector of delegate vector i.e. outer array should have row count number of values.
UInt4Vector offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
// Get inner vector of delegate vector.
ValueVector vector = ((RepeatedValueVector) v).getDataVector();
// Data vector of inner vector should
// have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values.
ValueVector dataVector = ((RepeatedValueVector) vector).getDataVector();
assertEquals(Integer.highestOneBit((testRowCount * 8) << 1), dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount * 2) << 1), offsetVector.getValueCapacity());
v.clear();
// Allocates the same as value passed since it is already power of two.
// -1 is done for adjustment needed for offset vector.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
// offset vector of delegate vector i.e. outer array should have row count number of values.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
// Get inner vector of delegate vector.
vector = ((RepeatedValueVector) v).getDataVector();
// Data vector of inner vector should
// have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values.
dataVector = ((RepeatedValueVector) vector).getDataVector();
assertEquals(testRowCountPowerTwo * 8, dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity());
v.clear();
// MAX ROW COUNT
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
// offset vector of delegate vector i.e. outer array should have row count number of values.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
// Get inner vector of delegate vector.
vector = ((RepeatedValueVector) v).getDataVector();
// Data vector of inner vector should
// have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values.
dataVector = ((RepeatedValueVector) vector).getDataVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 8, dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 2, offsetVector.getValueCapacity());
v.clear();
// MIN ROW COUNT
colSize.allocateVector(v, 0);
// offset vector of delegate vector i.e. outer array should have 1 value.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
// Get inner vector of delegate vector.
vector = ((RepeatedValueVector) v).getDataVector();
// Data vector of inner vector should have 1 value
dataVector = ((RepeatedValueVector) vector).getDataVector();
assertEquals(ValueVector.MIN_ROW_COUNT, dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * 1.
offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT * 2, offsetVector.getValueCapacity());
v.clear();
}
}
use of org.apache.drill.exec.physical.impl.ScanBatch in project drill by axbaretto.
the class MongoScanBatchCreator method getBatch.
@Override
public ScanBatch getBatch(ExecutorFragmentContext context, MongoSubScan subScan, List<RecordBatch> children) throws ExecutionSetupException {
Preconditions.checkArgument(children.isEmpty());
List<RecordReader> readers = new LinkedList<>();
List<SchemaPath> columns = null;
for (MongoSubScan.MongoSubScanSpec scanSpec : subScan.getChunkScanSpecList()) {
try {
if ((columns = subScan.getColumns()) == null) {
columns = GroupScan.ALL_COLUMNS;
}
readers.add(new MongoRecordReader(scanSpec, columns, context, subScan.getMongoStoragePlugin()));
} catch (Exception e) {
logger.error("MongoRecordReader creation failed for subScan: " + subScan + ".");
logger.error(e.getMessage(), e);
throw new ExecutionSetupException(e);
}
}
logger.info("Number of record readers initialized : " + readers.size());
return new ScanBatch(subScan, context, readers);
}
use of org.apache.drill.exec.physical.impl.ScanBatch in project drill by axbaretto.
the class KuduScanBatchCreator method getBatch.
@Override
public ScanBatch getBatch(ExecutorFragmentContext context, KuduSubScan subScan, List<RecordBatch> children) throws ExecutionSetupException {
Preconditions.checkArgument(children.isEmpty());
List<RecordReader> readers = new LinkedList<>();
List<SchemaPath> columns = null;
for (KuduSubScan.KuduSubScanSpec scanSpec : subScan.getTabletScanSpecList()) {
try {
if ((columns = subScan.getColumns()) == null) {
columns = GroupScan.ALL_COLUMNS;
}
readers.add(new KuduRecordReader(subScan.getStorageEngine().getClient(), scanSpec, columns));
} catch (Exception e1) {
throw new ExecutionSetupException(e1);
}
}
return new ScanBatch(subScan, context, readers);
}
use of org.apache.drill.exec.physical.impl.ScanBatch in project drill by axbaretto.
the class OpenTSDBBatchCreator method getBatch.
@Override
public CloseableRecordBatch getBatch(ExecutorFragmentContext context, OpenTSDBSubScan subScan, List<RecordBatch> children) throws ExecutionSetupException {
List<RecordReader> readers = new LinkedList<>();
List<SchemaPath> columns;
for (OpenTSDBSubScan.OpenTSDBSubScanSpec scanSpec : subScan.getTabletScanSpecList()) {
try {
if ((columns = subScan.getColumns()) == null) {
columns = GroupScan.ALL_COLUMNS;
}
readers.add(new OpenTSDBRecordReader(subScan.getStorageEngine().getClient(), scanSpec, columns));
} catch (Exception e) {
throw new ExecutionSetupException(e);
}
}
return new ScanBatch(subScan, context, readers);
}
Aggregations