use of org.apache.drill.exec.record.VectorAccessible in project drill by apache.
the class TestOutputBatchSize method testSizerRepeatedRepeatedList.
@Test
public void testSizerRepeatedRepeatedList() throws Exception {
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
StringBuilder newString = new StringBuilder();
newString.append("[ [[1,2,3,4], [5,6,7,8]], [[1,2,3,4], [5,6,7,8]] ]");
numRows = 9;
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"c\" : " + newString);
batchString.append("},");
}
batchString.append("{\"c\" : " + newString);
batchString.append("}");
batchString.append("]");
inputJsonBatches.add(batchString.toString());
// Create a dummy scanBatch to figure out the size.
RecordBatch scanBatch = new ScanBatch(new MockPhysicalOperator(), fragContext, getReaderListForJsonBatches(inputJsonBatches, fragContext));
VectorAccessible va = new BatchIterator(scanBatch).iterator().next();
RecordBatchSizer sizer = new RecordBatchSizer(va);
assertEquals(1, sizer.columns().size());
RecordBatchSizer.ColumnSize column = sizer.columns().get("c");
assertNotNull(column);
/**
* stdDataSize:8*5*5*5, stdNetSize:8*5*5*5 + 8*5*5 + 8*5 + 4,
* dataSizePerEntry:16*8, netSizePerEntry:16*8 + 16*4 + 4*2 + 4*2,
* totalDataSize:16*8*10, totalNetSize:netSizePerEntry*10, valueCount:10,
* elementCount:10, estElementCountPerArray:1, isVariableWidth:false
*/
assertEquals(1000, column.getStdDataSizePerEntry());
assertEquals(1244, column.getStdNetSizePerEntry());
assertEquals(128, column.getDataSizePerEntry());
assertEquals(156, column.getNetSizePerEntry());
assertEquals(1280, column.getTotalDataSize());
assertEquals(1560, column.getTotalNetSize());
assertEquals(10, column.getValueCount());
assertEquals(20, column.getElementCount());
assertEquals(2, column.getCardinality(), 0.01);
assertEquals(false, column.isVariableWidth());
final int testRowCount = 1000;
final int testRowCountPowerTwo = 2048;
for (VectorWrapper<?> vw : va) {
ValueVector v = vw.getValueVector();
v.clear();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
// offset vector of delegate vector i.e. outer array should have row count number of values.
UInt4Vector offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
// Get data vector of delegate vector. This is repeated list again
ValueVector dataVector = ((RepeatedListVector) v).getDataVector();
// offset vector of delegate vector of the inner repeated list
// This should have row count * 2 number of values.
offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount * 2) << 1), offsetVector.getValueCapacity());
// Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
ValueVector innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
assertEquals((Integer.highestOneBit((testRowCount * 2) << 1) - 1), dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount * 4) << 1), offsetVector.getValueCapacity());
// Data vector of inner vector should
// have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
assertEquals(Integer.highestOneBit(testRowCount << 1) * 16, dataVector.getValueCapacity());
v.clear();
// Allocates the same as value passed since it is already power of two.
// -1 is done for adjustment needed for offset vector.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
// offset vector of delegate vector i.e. outer array should have row count number of values.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
// Get data vector of delegate vector. This is repeated list again
dataVector = ((RepeatedListVector) v).getDataVector();
// offset vector of delegate vector of the inner repeated list
// This should have row count * 2 number of values.
offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity());
// Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
assertEquals(testRowCountPowerTwo * 2 - 1, dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
assertEquals(testRowCountPowerTwo * 4, offsetVector.getValueCapacity());
// Data vector of inner vector should
// have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
assertEquals(testRowCountPowerTwo * 16, dataVector.getValueCapacity());
v.clear();
// MAX ROW COUNT
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
// offset vector of delegate vector i.e. outer array should have row count number of values.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
// Get data vector of delegate vector. This is repeated list again
dataVector = ((RepeatedListVector) v).getDataVector();
// offset vector of delegate vector of the inner repeated list
// This should have row count * 2 number of values.
offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 2, offsetVector.getValueCapacity());
// Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 2 - 1, dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 4, offsetVector.getValueCapacity());
// Data vector of inner vector should
// have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 16, dataVector.getValueCapacity());
v.clear();
// MIN ROW COUNT
colSize.allocateVector(v, 0);
// offset vector of delegate vector i.e. outer array should have 1 value.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
// Get data vector of delegate vector. This is repeated list again
dataVector = ((RepeatedListVector) v).getDataVector();
// offset vector of delegate vector of the inner repeated list
offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * 1.
offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT * 2, offsetVector.getValueCapacity());
// Data vector of inner vector should 1 value.
dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
assertEquals(ValueVector.MIN_ROW_COUNT, dataVector.getValueCapacity());
v.clear();
}
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by apache.
the class DumpCat method getBatchMetaInfo.
/* Get batch meta info : rows, selectedRows, dataSize */
private BatchMetaInfo getBatchMetaInfo(VectorAccessibleSerializable vcSerializable) {
final VectorAccessible vectorContainer = vcSerializable.get();
int rows;
int selectedRows;
int totalDataSize = 0;
rows = vectorContainer.getRecordCount();
selectedRows = rows;
if (vectorContainer.getSchema().getSelectionVectorMode() == SelectionVectorMode.TWO_BYTE) {
selectedRows = vcSerializable.getSv2().getCount();
}
for (final VectorWrapper<?> w : vectorContainer) {
totalDataSize += w.getValueVector().getBufferSize();
}
return new BatchMetaInfo(rows, selectedRows, totalDataSize);
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by apache.
the class TestTraceOutputDump method testFilter.
@Test
public void testFilter() throws Throwable {
final DrillbitContext bitContext = mockDrillbitContext();
final UserClientConnection connection = Mockito.mock(UserClientConnection.class);
final PhysicalPlanReader reader = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(c);
final PhysicalPlan plan = reader.readPhysicalPlan(Files.asCharSource(DrillFileUtils.getResourceAsFile("/trace/simple_trace.json"), Charsets.UTF_8).read());
final FunctionImplementationRegistry registry = new FunctionImplementationRegistry(c);
final FragmentContextImpl context = new FragmentContextImpl(bitContext, PlanFragment.getDefaultInstance(), connection, registry);
final SimpleRootExec exec = new SimpleRootExec(ImplCreator.getExec(context, (FragmentRoot) plan.getSortedOperators(false).iterator().next()));
while (exec.next()) {
}
exec.close();
if (context.getExecutorState().getFailureCause() != null) {
throw context.getExecutorState().getFailureCause();
}
assertTrue(!context.getExecutorState().isFailed());
final FragmentHandle handle = context.getHandle();
/* Form the file name to which the trace output will dump the record batches */
final String qid = QueryIdHelper.getQueryId(handle.getQueryId());
final int majorFragmentId = handle.getMajorFragmentId();
final int minorFragmentId = handle.getMinorFragmentId();
final String logLocation = c.getString(ExecConstants.TRACE_DUMP_DIRECTORY);
final String filename = String.format("%s//%s_%d_%d_mock-scan", logLocation, qid, majorFragmentId, minorFragmentId);
final Configuration conf = new Configuration();
conf.set(FileSystem.FS_DEFAULT_NAME_KEY, c.getString(ExecConstants.TRACE_DUMP_FILESYSTEM));
final FileSystem fs = FileSystem.get(conf);
final Path path = new Path(filename);
assertTrue("Trace file does not exist", fs.exists(path));
final FSDataInputStream in = fs.open(path);
final VectorAccessibleSerializable wrap = new VectorAccessibleSerializable(context.getAllocator());
wrap.readFromStream(in);
final VectorAccessible container = wrap.get();
/* Assert there are no selection vectors */
assertNull(wrap.getSv2());
/* Assert there is only one record */
assertEquals(1, container.getRecordCount());
/* Read the Integer value and ASSERT its Integer.MIN_VALUE */
final int value = (int) container.iterator().next().getValueVector().getAccessor().getObject(0);
assertEquals(value, Integer.MIN_VALUE);
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by apache.
the class TestBatchValidator method testRepeatedBadArrayOffset.
@Test
public void testRepeatedBadArrayOffset() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.VARCHAR, DataMode.REPEATED).buildSchema();
SingleRowSet batch = fixture.rowSetBuilder(schema).addRow((Object) strArray()).addRow((Object) strArray("fred", "barney", "wilma")).addRow((Object) strArray("dino")).build();
VectorAccessible va = batch.vectorAccessible();
ValueVector v = va.iterator().next().getValueVector();
RepeatedVarCharVector vc = (RepeatedVarCharVector) v;
UInt4Vector ov = vc.getOffsetVector();
ov.getMutator().set(3, 1);
checkForError(batch, BAD_OFFSETS);
batch.clear();
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by apache.
the class TestBatchValidator method zapOffset.
public void zapOffset(SingleRowSet batch, int index, int bogusValue) {
// Here we are evil: stomp on an offset to simulate corruption.
// Don't do this in real code!
VectorAccessible va = batch.vectorAccessible();
ValueVector v = va.iterator().next().getValueVector();
VarCharVector vc = (VarCharVector) v;
UInt4Vector ov = vc.getOffsetVector();
ov.getMutator().set(index, bogusValue);
}
Aggregations