use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.
the class TestPartitionSender method testThreadsHelper.
/**
* Core of the testing
* @param hashToRandomExchange
* @param drillbitContext
* @param options
* @param incoming
* @param registry
* @param planReader
* @param planningSet
* @param rootFragment
* @param expectedThreadsCount
* @throws Exception
*/
private void testThreadsHelper(HashToRandomExchange hashToRandomExchange, DrillbitContext drillbitContext, OptionList options, RecordBatch incoming, FunctionImplementationRegistry registry, PhysicalPlanReader planReader, PlanningSet planningSet, Fragment rootFragment, int expectedThreadsCount) throws Exception {
final QueryContextInformation queryContextInfo = Utilities.createQueryContextInfo("dummySchemaName", "938ea2d9-7cb9-4baf-9414-a5a0b7777e8e");
final QueryWorkUnit qwu = PARALLELIZER.getFragments(options, drillbitContext.getEndpoint(), QueryId.getDefaultInstance(), drillbitContext.getBits(), rootFragment, USER_SESSION, queryContextInfo);
qwu.applyPlan(planReader);
final List<MinorFragmentEndpoint> mfEndPoints = PhysicalOperatorUtil.getIndexOrderedEndpoints(Lists.newArrayList(drillbitContext.getBits()));
for (PlanFragment planFragment : qwu.getFragments()) {
if (!planFragment.getFragmentJson().contains("hash-partition-sender")) {
continue;
}
MockPartitionSenderRootExec partionSenderRootExec = null;
FragmentContextImpl context = null;
try {
context = new FragmentContextImpl(drillbitContext, planFragment, null, registry);
final int majorFragmentId = planFragment.getHandle().getMajorFragmentId();
final HashPartitionSender partSender = new HashPartitionSender(majorFragmentId, hashToRandomExchange, hashToRandomExchange.getExpression(), mfEndPoints);
partionSenderRootExec = new MockPartitionSenderRootExec(context, incoming, partSender);
assertEquals("Number of threads calculated", expectedThreadsCount, partionSenderRootExec.getNumberPartitions());
partionSenderRootExec.createPartitioner();
final PartitionerDecorator partDecor = partionSenderRootExec.getPartitioner();
assertNotNull(partDecor);
List<Partitioner> partitioners = partDecor.getPartitioners();
assertNotNull(partitioners);
final int actualThreads = DRILLBITS_COUNT > expectedThreadsCount ? expectedThreadsCount : DRILLBITS_COUNT;
assertEquals("Number of partitioners", actualThreads, partitioners.size());
for (int i = 0; i < mfEndPoints.size(); i++) {
assertNotNull("PartitionOutgoingBatch", partDecor.getOutgoingBatches(i));
}
// check distribution of PartitionOutgoingBatch - should be even distribution
boolean isFirst = true;
int prevBatchCountSize = 0;
int batchCountSize = 0;
for (Partitioner part : partitioners) {
final List<PartitionOutgoingBatch> outBatch = (List<PartitionOutgoingBatch>) part.getOutgoingBatches();
batchCountSize = outBatch.size();
if (!isFirst) {
assertTrue(Math.abs(batchCountSize - prevBatchCountSize) <= 1);
} else {
isFirst = false;
}
prevBatchCountSize = batchCountSize;
}
partionSenderRootExec.getStats().startProcessing();
try {
partDecor.partitionBatch(incoming);
} finally {
partionSenderRootExec.getStats().stopProcessing();
}
if (actualThreads == 1) {
assertEquals("With single thread parent and child waitNanos should match", partitioners.get(0).getStats().getWaitNanos(), partionSenderRootExec.getStats().getWaitNanos());
}
// testing values distribution
partitioners = partDecor.getPartitioners();
isFirst = true;
// since we have fake Nullvector distribution is skewed
for (Partitioner part : partitioners) {
final List<PartitionOutgoingBatch> outBatches = (List<PartitionOutgoingBatch>) part.getOutgoingBatches();
for (PartitionOutgoingBatch partOutBatch : outBatches) {
final int recordCount = ((VectorAccessible) partOutBatch).getRecordCount();
if (isFirst) {
assertEquals("RecordCount", 100, recordCount);
isFirst = false;
} else {
assertEquals("RecordCount", 0, recordCount);
}
}
}
// test exceptions within threads
// test stats merging
partionSenderRootExec.getStats().startProcessing();
try {
partDecor.executeMethodLogic(new InjectExceptionTest());
fail("Should throw IOException here");
} catch (IOException ioe) {
final OperatorProfile.Builder oPBuilder = OperatorProfile.newBuilder();
partionSenderRootExec.getStats().addAllMetrics(oPBuilder);
final List<MetricValue> metrics = oPBuilder.getMetricList();
for (MetricValue metric : metrics) {
if (Metric.BYTES_SENT.metricId() == metric.getMetricId()) {
assertEquals("Should add metricValue irrespective of exception", 5 * actualThreads, metric.getLongValue());
}
if (Metric.SENDING_THREADS_COUNT.metricId() == metric.getMetricId()) {
assertEquals(actualThreads, metric.getLongValue());
}
}
assertEquals(actualThreads - 1, ioe.getSuppressed().length);
} finally {
partionSenderRootExec.getStats().stopProcessing();
}
} finally {
// cleanup
partionSenderRootExec.close();
context.close();
}
}
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.
the class TestTraceOutputDump method testFilter.
@Test
public void testFilter() throws Throwable {
final DrillbitContext bitContext = mockDrillbitContext();
final UserClientConnection connection = Mockito.mock(UserClientConnection.class);
final PhysicalPlanReader reader = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(c);
final PhysicalPlan plan = reader.readPhysicalPlan(Files.toString(DrillFileUtils.getResourceAsFile("/trace/simple_trace.json"), Charsets.UTF_8));
final FunctionImplementationRegistry registry = new FunctionImplementationRegistry(c);
final FragmentContextImpl context = new FragmentContextImpl(bitContext, PlanFragment.getDefaultInstance(), connection, registry);
final SimpleRootExec exec = new SimpleRootExec(ImplCreator.getExec(context, (FragmentRoot) plan.getSortedOperators(false).iterator().next()));
while (exec.next()) {
}
exec.close();
if (context.getExecutorState().getFailureCause() != null) {
throw context.getExecutorState().getFailureCause();
}
assertTrue(!context.getExecutorState().isFailed());
final FragmentHandle handle = context.getHandle();
/* Form the file name to which the trace output will dump the record batches */
final String qid = QueryIdHelper.getQueryId(handle.getQueryId());
final int majorFragmentId = handle.getMajorFragmentId();
final int minorFragmentId = handle.getMinorFragmentId();
final String logLocation = c.getString(ExecConstants.TRACE_DUMP_DIRECTORY);
System.out.println("Found log location: " + logLocation);
final String filename = String.format("%s//%s_%d_%d_mock-scan", logLocation, qid, majorFragmentId, minorFragmentId);
System.out.println("File Name: " + filename);
final Configuration conf = new Configuration();
conf.set(FileSystem.FS_DEFAULT_NAME_KEY, c.getString(ExecConstants.TRACE_DUMP_FILESYSTEM));
final FileSystem fs = FileSystem.get(conf);
final Path path = new Path(filename);
assertTrue("Trace file does not exist", fs.exists(path));
final FSDataInputStream in = fs.open(path);
final VectorAccessibleSerializable wrap = new VectorAccessibleSerializable(context.getAllocator());
wrap.readFromStream(in);
final VectorAccessible container = wrap.get();
/* Assert there are no selection vectors */
assertTrue(wrap.getSv2() == null);
/* Assert there is only one record */
assertTrue(container.getRecordCount() == 1);
/* Read the Integer value and ASSERT its Integer.MIN_VALUE */
final int value = (int) container.iterator().next().getValueVector().getAccessor().getObject(0);
assertTrue(value == Integer.MIN_VALUE);
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.
the class TestBatchValidator method testVariableMissingLast.
@Test
public void testVariableMissingLast() {
BatchSchema schema = new SchemaBuilder().add("a", MinorType.VARCHAR).build();
SingleRowSet batch = fixture.rowSetBuilder(schema).addRow("x").addRow("y").addRow("z").build();
// Here we are evil: stomp on the last offset to simulate corruption.
// Don't do this in real code!
VectorAccessible va = batch.vectorAccessible();
ValueVector v = va.iterator().next().getValueVector();
VarCharVector vc = (VarCharVector) v;
UInt4Vector ov = vc.getOffsetVector();
assertTrue(ov.getAccessor().get(3) > 0);
ov.getMutator().set(3, 0);
// Validator should catch the error.
BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
validator.validate();
List<String> errors = validator.errors();
assertEquals(1, errors.size());
assertTrue(errors.get(0).contains("Decreasing offsets"));
batch.clear();
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.
the class DumpCat method getBatchMetaInfo.
/* Get batch meta info : rows, selectedRows, dataSize */
private BatchMetaInfo getBatchMetaInfo(VectorAccessibleSerializable vcSerializable) {
final VectorAccessible vectorContainer = vcSerializable.get();
int rows;
int selectedRows;
int totalDataSize = 0;
rows = vectorContainer.getRecordCount();
selectedRows = rows;
if (vectorContainer.getSchema().getSelectionVectorMode() == SelectionVectorMode.TWO_BYTE) {
selectedRows = vcSerializable.getSv2().getCount();
}
for (final VectorWrapper w : vectorContainer) {
totalDataSize += w.getValueVector().getBufferSize();
}
return new BatchMetaInfo(rows, selectedRows, totalDataSize);
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.
the class NestedLoopJoinBatch method setupWorker.
/**
* Method generates the runtime code needed for NLJ. Other than the setup method to set the input and output value
* vector references we implement three more methods
* 1. doEval() -> Evaluates if record from left side matches record from the right side
* 2. emitLeft() -> Project record from the left side
* 3. emitRight() -> Project record from the right side (which is a hyper container)
* @return the runtime generated class that implements the NestedLoopJoin interface
*/
private NestedLoopJoin setupWorker() throws IOException, ClassTransformationException, SchemaChangeException {
final CodeGenerator<NestedLoopJoin> nLJCodeGenerator = CodeGenerator.get(NestedLoopJoin.TEMPLATE_DEFINITION, context.getOptions());
nLJCodeGenerator.plainJavaCapable(true);
// Uncomment out this line to debug the generated code.
// nLJCodeGenerator.saveCodeForDebugging(true);
final ClassGenerator<NestedLoopJoin> nLJClassGenerator = nLJCodeGenerator.getRoot();
// generate doEval
final ErrorCollector collector = new ErrorCollectorImpl();
/*
Logical expression may contain fields from left and right batches. During code generation (materialization)
we need to indicate from which input field should be taken.
Non-equality joins can belong to one of below categories. For example:
1. Join on non-equality join predicates:
select * from t1 inner join t2 on (t1.c1 between t2.c1 AND t2.c2) AND (...)
2. Join with an OR predicate:
select * from t1 inner join t2 on on t1.c1 = t2.c1 OR t1.c2 = t2.c2
*/
Map<VectorAccessible, BatchReference> batches = ImmutableMap.<VectorAccessible, BatchReference>builder().put(left, new BatchReference("leftBatch", "leftIndex")).put(rightContainer, new BatchReference("rightContainer", "rightBatchIndex", "rightRecordIndexWithinBatch")).build();
LogicalExpression materialize = ExpressionTreeMaterializer.materialize(popConfig.getCondition(), batches, collector, context.getFunctionRegistry(), false, false);
if (collector.hasErrors()) {
throw new SchemaChangeException(String.format("Failure while trying to materialize join condition. Errors:\n %s.", collector.toErrorString()));
}
nLJClassGenerator.addExpr(new ReturnValueExpression(materialize), ClassGenerator.BlkCreateMode.FALSE);
// generate emitLeft
nLJClassGenerator.setMappingSet(emitLeftMapping);
JExpression outIndex = JExpr.direct("outIndex");
JExpression leftIndex = JExpr.direct("leftIndex");
int fieldId = 0;
int outputFieldId = 0;
if (leftSchema != null) {
// Set the input and output value vector references corresponding to the left batch
for (MaterializedField field : leftSchema) {
final TypeProtos.MajorType fieldType = field.getType();
// Add the vector to the output container
container.addOrGet(field);
JVar inVV = nLJClassGenerator.declareVectorValueSetupAndMember("leftBatch", new TypedFieldId(fieldType, false, fieldId));
JVar outVV = nLJClassGenerator.declareVectorValueSetupAndMember("outgoing", new TypedFieldId(fieldType, false, outputFieldId));
nLJClassGenerator.getEvalBlock().add(outVV.invoke("copyFromSafe").arg(leftIndex).arg(outIndex).arg(inVV));
nLJClassGenerator.rotateBlock();
fieldId++;
outputFieldId++;
}
}
// generate emitRight
fieldId = 0;
nLJClassGenerator.setMappingSet(emitRightMapping);
JExpression batchIndex = JExpr.direct("batchIndex");
JExpression recordIndexWithinBatch = JExpr.direct("recordIndexWithinBatch");
if (rightSchema != null) {
// Set the input and output value vector references corresponding to the right batch
for (MaterializedField field : rightSchema) {
final TypeProtos.MajorType inputType = field.getType();
TypeProtos.MajorType outputType;
// if join type is LEFT, make sure right batch output fields data mode is optional
if (popConfig.getJoinType() == JoinRelType.LEFT && inputType.getMode() == TypeProtos.DataMode.REQUIRED) {
outputType = Types.overrideMode(inputType, TypeProtos.DataMode.OPTIONAL);
} else {
outputType = inputType;
}
MaterializedField newField = MaterializedField.create(field.getName(), outputType);
container.addOrGet(newField);
JVar inVV = nLJClassGenerator.declareVectorValueSetupAndMember("rightContainer", new TypedFieldId(inputType, true, fieldId));
JVar outVV = nLJClassGenerator.declareVectorValueSetupAndMember("outgoing", new TypedFieldId(outputType, false, outputFieldId));
nLJClassGenerator.getEvalBlock().add(outVV.invoke("copyFromSafe").arg(recordIndexWithinBatch).arg(outIndex).arg(inVV.component(batchIndex)));
nLJClassGenerator.rotateBlock();
fieldId++;
outputFieldId++;
}
}
return context.getImplementationClass(nLJCodeGenerator);
}
Aggregations