use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.
the class TestBatchValidator method zapOffset.
public void zapOffset(SingleRowSet batch, int index, int bogusValue) {
// Here we are evil: stomp on an offset to simulate corruption.
// Don't do this in real code!
VectorAccessible va = batch.vectorAccessible();
ValueVector v = va.iterator().next().getValueVector();
VarCharVector vc = (VarCharVector) v;
UInt4Vector ov = vc.getOffsetVector();
ov.getMutator().set(index, bogusValue);
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.
the class TestBatchValidator method testRepeatedBadArrayOffset.
@Test
public void testRepeatedBadArrayOffset() {
BatchSchema schema = new SchemaBuilder().add("a", MinorType.VARCHAR, DataMode.REPEATED).build();
SingleRowSet batch = fixture.rowSetBuilder(schema).addRow((Object) strArray()).addRow((Object) strArray("fred", "barney", "wilma")).addRow((Object) strArray("dino")).build();
VectorAccessible va = batch.vectorAccessible();
ValueVector v = va.iterator().next().getValueVector();
RepeatedVarCharVector vc = (RepeatedVarCharVector) v;
UInt4Vector ov = vc.getOffsetVector();
ov.getMutator().set(3, 1);
BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
validator.validate();
List<String> errors = validator.errors();
assertEquals(1, errors.size());
assertTrue(errors.get(0).contains("Decreasing offsets"));
batch.clear();
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.
the class TestOutputBatchSize method testSizerRepeatedRepeatedList.
@Test
public void testSizerRepeatedRepeatedList() throws Exception {
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
StringBuilder newString = new StringBuilder();
newString.append("[ [[1,2,3,4], [5,6,7,8]], [[1,2,3,4], [5,6,7,8]] ]");
numRows = 9;
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"c\" : " + newString);
batchString.append("},");
}
batchString.append("{\"c\" : " + newString);
batchString.append("}");
batchString.append("]");
inputJsonBatches.add(batchString.toString());
// Create a dummy scanBatch to figure out the size.
RecordBatch scanBatch = new ScanBatch(new MockPhysicalOperator(), fragContext, getReaderListForJsonBatches(inputJsonBatches, fragContext));
VectorAccessible va = new BatchIterator(scanBatch).iterator().next();
RecordBatchSizer sizer = new RecordBatchSizer(va);
assertEquals(1, sizer.columns().size());
RecordBatchSizer.ColumnSize column = sizer.columns().get("c");
assertNotNull(column);
/**
* stdDataSize:8*10*10*10, stdNetSize:8*10*10*10 + 8*10*10 + 8*10 + 4,
* dataSizePerEntry:16*8, netSizePerEntry:16*8 + 16*4 + 4*2 + 4*2,
* totalDataSize:16*8*10, totalNetSize:netSizePerEntry*10, valueCount:10,
* elementCount:10, estElementCountPerArray:1, isVariableWidth:false
*/
assertEquals(8000, column.getStdDataSizePerEntry());
assertEquals(8884, column.getStdNetSizePerEntry());
assertEquals(128, column.getDataSizePerEntry());
assertEquals(156, column.getNetSizePerEntry());
assertEquals(1280, column.getTotalDataSize());
assertEquals(1560, column.getTotalNetSize());
assertEquals(10, column.getValueCount());
assertEquals(20, column.getElementCount());
assertEquals(2, column.getCardinality(), 0.01);
assertEquals(false, column.isVariableWidth());
final int testRowCount = 1000;
final int testRowCountPowerTwo = 2048;
for (VectorWrapper<?> vw : va) {
ValueVector v = vw.getValueVector();
v.clear();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
// offset vector of delegate vector i.e. outer array should have row count number of values.
UInt4Vector offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
// Get data vector of delegate vector. This is repeated list again
ValueVector dataVector = ((RepeatedListVector) v).getDataVector();
// offset vector of delegate vector of the inner repeated list
// This should have row count * 2 number of values.
offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount * 2) << 1), offsetVector.getValueCapacity());
// Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
ValueVector innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
assertEquals((Integer.highestOneBit((testRowCount * 2) << 1) - 1), dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount * 4) << 1), offsetVector.getValueCapacity());
// Data vector of inner vector should
// have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
assertEquals(Integer.highestOneBit(testRowCount << 1) * 16, dataVector.getValueCapacity());
v.clear();
// Allocates the same as value passed since it is already power of two.
// -1 is done for adjustment needed for offset vector.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
// offset vector of delegate vector i.e. outer array should have row count number of values.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
// Get data vector of delegate vector. This is repeated list again
dataVector = ((RepeatedListVector) v).getDataVector();
// offset vector of delegate vector of the inner repeated list
// This should have row count * 2 number of values.
offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity());
// Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
assertEquals(testRowCountPowerTwo * 2 - 1, dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
assertEquals(testRowCountPowerTwo * 4, offsetVector.getValueCapacity());
// Data vector of inner vector should
// have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
assertEquals(testRowCountPowerTwo * 16, dataVector.getValueCapacity());
v.clear();
// MAX ROW COUNT
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
// offset vector of delegate vector i.e. outer array should have row count number of values.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
// Get data vector of delegate vector. This is repeated list again
dataVector = ((RepeatedListVector) v).getDataVector();
// offset vector of delegate vector of the inner repeated list
// This should have row count * 2 number of values.
offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 2, offsetVector.getValueCapacity());
// Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 2 - 1, dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 4, offsetVector.getValueCapacity());
// Data vector of inner vector should
// have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 16, dataVector.getValueCapacity());
v.clear();
// MIN ROW COUNT
colSize.allocateVector(v, 0);
// offset vector of delegate vector i.e. outer array should have 1 value.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
// Get data vector of delegate vector. This is repeated list again
dataVector = ((RepeatedListVector) v).getDataVector();
// offset vector of delegate vector of the inner repeated list
offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * 1.
offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT * 2, offsetVector.getValueCapacity());
// Data vector of inner vector should 1 value.
dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
assertEquals(ValueVector.MIN_ROW_COUNT, dataVector.getValueCapacity());
v.clear();
}
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.
the class DrillTestWrapper method addToCombinedVectorResults.
/**
* Add to result vectors and compare batch schema against expected schema while iterating batches.
* @param batches
* @param expectedSchema: the expected schema the batches should contain. Through SchemaChangeException
* if encounter different batch schema.
* @param combinedVectors: the vectors to hold the values when iterate the batches.
*
* @return number of batches
* @throws SchemaChangeException
* @throws UnsupportedEncodingException
*/
public static int addToCombinedVectorResults(Iterable<VectorAccessible> batches, BatchSchema expectedSchema, Long expectedBatchSize, Integer expectedNumBatches, Map<String, List<Object>> combinedVectors) throws SchemaChangeException, UnsupportedEncodingException {
// TODO - this does not handle schema changes
int numBatch = 0;
long totalRecords = 0;
BatchSchema schema = null;
for (VectorAccessible loader : batches) {
numBatch++;
if (expectedSchema != null) {
if (!expectedSchema.isEquivalent(loader.getSchema())) {
throw new SchemaChangeException(String.format("Batch schema does not match expected schema\n" + "Actual schema: %s. Expected schema : %s", loader.getSchema(), expectedSchema));
}
}
if (expectedBatchSize != null) {
RecordBatchSizer sizer = new RecordBatchSizer(loader);
// Not checking actualSize as accounting is not correct when we do
// split and transfer ownership across operators.
Assert.assertTrue(sizer.netSize() <= expectedBatchSize);
}
// SchemaChangeException, so check/clean throws clause above.
if (schema == null) {
schema = loader.getSchema();
for (MaterializedField mf : schema) {
combinedVectors.put(SchemaPath.getSimplePath(mf.getName()).toExpr(), new ArrayList<>());
}
} else {
// TODO - actually handle schema changes, this is just to get access to the SelectionVectorMode
// of the current batch, the check for a null schema is used to only mutate the schema once
// need to add new vectors and null fill for previous batches? distinction between null and non-existence important?
schema = loader.getSchema();
}
logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
totalRecords += loader.getRecordCount();
for (VectorWrapper<?> w : loader) {
String field = SchemaPath.getSimplePath(w.getField().getName()).toExpr();
ValueVector[] vectors;
if (w.isHyper()) {
vectors = w.getValueVectors();
} else {
vectors = new ValueVector[] { w.getValueVector() };
}
SelectionVector2 sv2 = null;
SelectionVector4 sv4 = null;
switch(schema.getSelectionVectorMode()) {
case TWO_BYTE:
sv2 = loader.getSelectionVector2();
break;
case FOUR_BYTE:
sv4 = loader.getSelectionVector4();
break;
}
if (sv4 != null) {
for (int j = 0; j < sv4.getCount(); j++) {
int complexIndex = sv4.get(j);
int batchIndex = complexIndex >> 16;
int recordIndexInBatch = complexIndex & 65535;
Object obj = vectors[batchIndex].getAccessor().getObject(recordIndexInBatch);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
}
combinedVectors.get(field).add(obj);
}
} else {
for (ValueVector vv : vectors) {
for (int j = 0; j < loader.getRecordCount(); j++) {
int index;
if (sv2 != null) {
index = sv2.getIndex(j);
} else {
index = j;
}
Object obj = vv.getAccessor().getObject(index);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
}
combinedVectors.get(field).add(obj);
}
}
}
}
}
if (expectedNumBatches != null) {
// Based on how much memory is actually taken by value vectors (because of doubling stuff),
// we have to do complex math for predicting exact number of batches.
// Instead, check that number of batches is at least the minimum that is expected
// and no more than twice of that.
Assert.assertTrue(numBatch >= expectedNumBatches);
Assert.assertTrue(numBatch <= (2 * expectedNumBatches));
}
return numBatch;
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by apache.
the class TestPartitionSender method testThreadsHelper.
/**
* Core of the testing
* @param hashToRandomExchange
* @param drillbitContext
* @param options
* @param incoming
* @param registry
* @param planReader
* @param planningSet
* @param rootFragment
* @param expectedThreadsCount
* @throws Exception
*/
private void testThreadsHelper(HashToRandomExchange hashToRandomExchange, DrillbitContext drillbitContext, OptionList options, RecordBatch incoming, FunctionImplementationRegistry registry, PhysicalPlanReader planReader, PlanningSet planningSet, Fragment rootFragment, int expectedThreadsCount) throws Exception {
final QueryContextInformation queryContextInfo = Utilities.createQueryContextInfo("dummySchemaName", "938ea2d9-7cb9-4baf-9414-a5a0b7777e8e");
final QueryWorkUnit qwu = PARALLELIZER.generateWorkUnit(options, drillbitContext.getEndpoint(), QueryId.getDefaultInstance(), drillbitContext.getBits(), rootFragment, USER_SESSION, queryContextInfo);
qwu.applyPlan(planReader);
final List<MinorFragmentEndpoint> mfEndPoints = PhysicalOperatorUtil.getIndexOrderedEndpoints(Lists.newArrayList(drillbitContext.getBits()));
for (PlanFragment planFragment : qwu.getFragments()) {
if (!planFragment.getFragmentJson().contains("hash-partition-sender")) {
continue;
}
MockPartitionSenderRootExec partionSenderRootExec = null;
FragmentContextImpl context = null;
try {
context = new FragmentContextImpl(drillbitContext, planFragment, null, registry);
context.setExecutorState(new MockExecutorState());
final int majorFragmentId = planFragment.getHandle().getMajorFragmentId();
final HashPartitionSender partSender = new HashPartitionSender(majorFragmentId, hashToRandomExchange, hashToRandomExchange.getExpression(), mfEndPoints);
partionSenderRootExec = new MockPartitionSenderRootExec(context, incoming, partSender);
assertEquals("Number of threads calculated", expectedThreadsCount, partionSenderRootExec.getNumberPartitions());
partionSenderRootExec.createPartitioner();
final PartitionerDecorator partDecor = partionSenderRootExec.getPartitioner();
assertNotNull(partDecor);
List<Partitioner> partitioners = partDecor.getPartitioners();
assertNotNull(partitioners);
final int actualThreads = DRILLBITS_COUNT > expectedThreadsCount ? expectedThreadsCount : DRILLBITS_COUNT;
assertEquals("Number of partitioners", actualThreads, partitioners.size());
for (int i = 0; i < mfEndPoints.size(); i++) {
assertNotNull("PartitionOutgoingBatch", partDecor.getOutgoingBatches(i));
}
// check distribution of PartitionOutgoingBatch - should be even distribution
boolean isFirst = true;
int prevBatchCountSize = 0;
int batchCountSize = 0;
for (Partitioner part : partitioners) {
@SuppressWarnings("unchecked") final List<PartitionOutgoingBatch> outBatch = (List<PartitionOutgoingBatch>) part.getOutgoingBatches();
batchCountSize = outBatch.size();
if (!isFirst) {
assertTrue(Math.abs(batchCountSize - prevBatchCountSize) <= 1);
} else {
isFirst = false;
}
prevBatchCountSize = batchCountSize;
}
partionSenderRootExec.getStats().startProcessing();
try {
partDecor.partitionBatch(incoming);
} finally {
partionSenderRootExec.getStats().stopProcessing();
}
if (actualThreads == 1) {
assertEquals("With single thread parent and child waitNanos should match", partitioners.get(0).getStats().getWaitNanos(), partionSenderRootExec.getStats().getWaitNanos());
}
// testing values distribution
partitioners = partDecor.getPartitioners();
isFirst = true;
// since we have fake Nullvector distribution is skewed
for (Partitioner part : partitioners) {
@SuppressWarnings("unchecked") final List<PartitionOutgoingBatch> outBatches = (List<PartitionOutgoingBatch>) part.getOutgoingBatches();
for (PartitionOutgoingBatch partOutBatch : outBatches) {
final int recordCount = ((VectorAccessible) partOutBatch).getRecordCount();
if (isFirst) {
assertEquals("RecordCount", 100, recordCount);
isFirst = false;
} else {
assertEquals("RecordCount", 0, recordCount);
}
}
}
// test exceptions within threads
// test stats merging
partionSenderRootExec.getStats().startProcessing();
try {
partDecor.executeMethodLogic(new InjectExceptionTest());
fail("executeMethodLogic should throw an exception.");
} catch (ExecutionException e) {
final OperatorProfile.Builder oPBuilder = OperatorProfile.newBuilder();
partionSenderRootExec.getStats().addAllMetrics(oPBuilder);
final List<MetricValue> metrics = oPBuilder.getMetricList();
for (MetricValue metric : metrics) {
if (Metric.BYTES_SENT.metricId() == metric.getMetricId()) {
assertEquals("Should add metricValue irrespective of exception", 5 * actualThreads, metric.getLongValue());
}
if (Metric.SENDING_THREADS_COUNT.metricId() == metric.getMetricId()) {
assertEquals(actualThreads, metric.getLongValue());
}
}
assertTrue(e.getCause() instanceof IOException);
assertEquals(actualThreads - 1, e.getCause().getSuppressed().length);
} finally {
partionSenderRootExec.getStats().stopProcessing();
}
} finally {
// cleanup
partionSenderRootExec.close();
context.close();
}
}
}
Aggregations