Search in sources :

Example 21 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.

the class TestPartitionSender method testThreadsHelper.

/**
 * Core of the testing
 * @param hashToRandomExchange
 * @param drillbitContext
 * @param options
 * @param incoming
 * @param registry
 * @param planReader
 * @param planningSet
 * @param rootFragment
 * @param expectedThreadsCount
 * @throws Exception
 */
private void testThreadsHelper(HashToRandomExchange hashToRandomExchange, DrillbitContext drillbitContext, OptionList options, RecordBatch incoming, FunctionImplementationRegistry registry, PhysicalPlanReader planReader, PlanningSet planningSet, Fragment rootFragment, int expectedThreadsCount) throws Exception {
    final QueryContextInformation queryContextInfo = Utilities.createQueryContextInfo("dummySchemaName", "938ea2d9-7cb9-4baf-9414-a5a0b7777e8e");
    final QueryWorkUnit qwu = PARALLELIZER.getFragments(options, drillbitContext.getEndpoint(), QueryId.getDefaultInstance(), drillbitContext.getBits(), rootFragment, USER_SESSION, queryContextInfo);
    qwu.applyPlan(planReader);
    final List<MinorFragmentEndpoint> mfEndPoints = PhysicalOperatorUtil.getIndexOrderedEndpoints(Lists.newArrayList(drillbitContext.getBits()));
    for (PlanFragment planFragment : qwu.getFragments()) {
        if (!planFragment.getFragmentJson().contains("hash-partition-sender")) {
            continue;
        }
        MockPartitionSenderRootExec partionSenderRootExec = null;
        FragmentContextImpl context = null;
        try {
            context = new FragmentContextImpl(drillbitContext, planFragment, null, registry);
            final int majorFragmentId = planFragment.getHandle().getMajorFragmentId();
            final HashPartitionSender partSender = new HashPartitionSender(majorFragmentId, hashToRandomExchange, hashToRandomExchange.getExpression(), mfEndPoints);
            partionSenderRootExec = new MockPartitionSenderRootExec(context, incoming, partSender);
            assertEquals("Number of threads calculated", expectedThreadsCount, partionSenderRootExec.getNumberPartitions());
            partionSenderRootExec.createPartitioner();
            final PartitionerDecorator partDecor = partionSenderRootExec.getPartitioner();
            assertNotNull(partDecor);
            List<Partitioner> partitioners = partDecor.getPartitioners();
            assertNotNull(partitioners);
            final int actualThreads = DRILLBITS_COUNT > expectedThreadsCount ? expectedThreadsCount : DRILLBITS_COUNT;
            assertEquals("Number of partitioners", actualThreads, partitioners.size());
            for (int i = 0; i < mfEndPoints.size(); i++) {
                assertNotNull("PartitionOutgoingBatch", partDecor.getOutgoingBatches(i));
            }
            // check distribution of PartitionOutgoingBatch - should be even distribution
            boolean isFirst = true;
            int prevBatchCountSize = 0;
            int batchCountSize = 0;
            for (Partitioner part : partitioners) {
                final List<PartitionOutgoingBatch> outBatch = (List<PartitionOutgoingBatch>) part.getOutgoingBatches();
                batchCountSize = outBatch.size();
                if (!isFirst) {
                    assertTrue(Math.abs(batchCountSize - prevBatchCountSize) <= 1);
                } else {
                    isFirst = false;
                }
                prevBatchCountSize = batchCountSize;
            }
            partionSenderRootExec.getStats().startProcessing();
            try {
                partDecor.partitionBatch(incoming);
            } finally {
                partionSenderRootExec.getStats().stopProcessing();
            }
            if (actualThreads == 1) {
                assertEquals("With single thread parent and child waitNanos should match", partitioners.get(0).getStats().getWaitNanos(), partionSenderRootExec.getStats().getWaitNanos());
            }
            // testing values distribution
            partitioners = partDecor.getPartitioners();
            isFirst = true;
            // since we have fake Nullvector distribution is skewed
            for (Partitioner part : partitioners) {
                final List<PartitionOutgoingBatch> outBatches = (List<PartitionOutgoingBatch>) part.getOutgoingBatches();
                for (PartitionOutgoingBatch partOutBatch : outBatches) {
                    final int recordCount = ((VectorAccessible) partOutBatch).getRecordCount();
                    if (isFirst) {
                        assertEquals("RecordCount", 100, recordCount);
                        isFirst = false;
                    } else {
                        assertEquals("RecordCount", 0, recordCount);
                    }
                }
            }
            // test exceptions within threads
            // test stats merging
            partionSenderRootExec.getStats().startProcessing();
            try {
                partDecor.executeMethodLogic(new InjectExceptionTest());
                fail("Should throw IOException here");
            } catch (IOException ioe) {
                final OperatorProfile.Builder oPBuilder = OperatorProfile.newBuilder();
                partionSenderRootExec.getStats().addAllMetrics(oPBuilder);
                final List<MetricValue> metrics = oPBuilder.getMetricList();
                for (MetricValue metric : metrics) {
                    if (Metric.BYTES_SENT.metricId() == metric.getMetricId()) {
                        assertEquals("Should add metricValue irrespective of exception", 5 * actualThreads, metric.getLongValue());
                    }
                    if (Metric.SENDING_THREADS_COUNT.metricId() == metric.getMetricId()) {
                        assertEquals(actualThreads, metric.getLongValue());
                    }
                }
                assertEquals(actualThreads - 1, ioe.getSuppressed().length);
            } finally {
                partionSenderRootExec.getStats().stopProcessing();
            }
        } finally {
            // cleanup
            partionSenderRootExec.close();
            context.close();
        }
    }
}
Also used : HashPartitionSender(org.apache.drill.exec.physical.config.HashPartitionSender) VectorAccessible(org.apache.drill.exec.record.VectorAccessible) QueryWorkUnit(org.apache.drill.exec.work.QueryWorkUnit) FragmentContextImpl(org.apache.drill.exec.ops.FragmentContextImpl) IOException(java.io.IOException) PlanFragment(org.apache.drill.exec.proto.BitControl.PlanFragment) MinorFragmentEndpoint(org.apache.drill.exec.physical.MinorFragmentEndpoint) MinorFragmentEndpoint(org.apache.drill.exec.physical.MinorFragmentEndpoint) MetricValue(org.apache.drill.exec.proto.UserBitShared.MetricValue) List(java.util.List) OptionList(org.apache.drill.exec.server.options.OptionList) QueryContextInformation(org.apache.drill.exec.proto.BitControl.QueryContextInformation)

Example 22 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.

the class TestTraceOutputDump method testFilter.

@Test
public void testFilter() throws Throwable {
    final DrillbitContext bitContext = mockDrillbitContext();
    final UserClientConnection connection = Mockito.mock(UserClientConnection.class);
    final PhysicalPlanReader reader = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(c);
    final PhysicalPlan plan = reader.readPhysicalPlan(Files.toString(DrillFileUtils.getResourceAsFile("/trace/simple_trace.json"), Charsets.UTF_8));
    final FunctionImplementationRegistry registry = new FunctionImplementationRegistry(c);
    final FragmentContextImpl context = new FragmentContextImpl(bitContext, PlanFragment.getDefaultInstance(), connection, registry);
    final SimpleRootExec exec = new SimpleRootExec(ImplCreator.getExec(context, (FragmentRoot) plan.getSortedOperators(false).iterator().next()));
    while (exec.next()) {
    }
    exec.close();
    if (context.getExecutorState().getFailureCause() != null) {
        throw context.getExecutorState().getFailureCause();
    }
    assertTrue(!context.getExecutorState().isFailed());
    final FragmentHandle handle = context.getHandle();
    /* Form the file name to which the trace output will dump the record batches */
    final String qid = QueryIdHelper.getQueryId(handle.getQueryId());
    final int majorFragmentId = handle.getMajorFragmentId();
    final int minorFragmentId = handle.getMinorFragmentId();
    final String logLocation = c.getString(ExecConstants.TRACE_DUMP_DIRECTORY);
    System.out.println("Found log location: " + logLocation);
    final String filename = String.format("%s//%s_%d_%d_mock-scan", logLocation, qid, majorFragmentId, minorFragmentId);
    System.out.println("File Name: " + filename);
    final Configuration conf = new Configuration();
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, c.getString(ExecConstants.TRACE_DUMP_FILESYSTEM));
    final FileSystem fs = FileSystem.get(conf);
    final Path path = new Path(filename);
    assertTrue("Trace file does not exist", fs.exists(path));
    final FSDataInputStream in = fs.open(path);
    final VectorAccessibleSerializable wrap = new VectorAccessibleSerializable(context.getAllocator());
    wrap.readFromStream(in);
    final VectorAccessible container = wrap.get();
    /* Assert there are no selection vectors */
    assertTrue(wrap.getSv2() == null);
    /* Assert there is only one record */
    assertTrue(container.getRecordCount() == 1);
    /* Read the Integer value and ASSERT its Integer.MIN_VALUE */
    final int value = (int) container.iterator().next().getValueVector().getAccessor().getObject(0);
    assertTrue(value == Integer.MIN_VALUE);
}
Also used : DrillbitContext(org.apache.drill.exec.server.DrillbitContext) Path(org.apache.hadoop.fs.Path) VectorAccessibleSerializable(org.apache.drill.exec.cache.VectorAccessibleSerializable) PhysicalPlan(org.apache.drill.exec.physical.PhysicalPlan) Configuration(org.apache.hadoop.conf.Configuration) VectorAccessible(org.apache.drill.exec.record.VectorAccessible) PhysicalPlanReader(org.apache.drill.exec.planner.PhysicalPlanReader) FragmentContextImpl(org.apache.drill.exec.ops.FragmentContextImpl) FragmentRoot(org.apache.drill.exec.physical.base.FragmentRoot) FragmentHandle(org.apache.drill.exec.proto.ExecProtos.FragmentHandle) SimpleRootExec(org.apache.drill.exec.physical.impl.SimpleRootExec) FileSystem(org.apache.hadoop.fs.FileSystem) UserClientConnection(org.apache.drill.exec.rpc.UserClientConnection) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FunctionImplementationRegistry(org.apache.drill.exec.expr.fn.FunctionImplementationRegistry) ExecTest(org.apache.drill.exec.ExecTest) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test)

Example 23 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.

the class TestBatchValidator method testVariableMissingLast.

@Test
public void testVariableMissingLast() {
    BatchSchema schema = new SchemaBuilder().add("a", MinorType.VARCHAR).build();
    SingleRowSet batch = fixture.rowSetBuilder(schema).addRow("x").addRow("y").addRow("z").build();
    // Here we are evil: stomp on the last offset to simulate corruption.
    // Don't do this in real code!
    VectorAccessible va = batch.vectorAccessible();
    ValueVector v = va.iterator().next().getValueVector();
    VarCharVector vc = (VarCharVector) v;
    UInt4Vector ov = vc.getOffsetVector();
    assertTrue(ov.getAccessor().get(3) > 0);
    ov.getMutator().set(3, 0);
    // Validator should catch the error.
    BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
    validator.validate();
    List<String> errors = validator.errors();
    assertEquals(1, errors.size());
    assertTrue(errors.get(0).contains("Decreasing offsets"));
    batch.clear();
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) VectorAccessible(org.apache.drill.exec.record.VectorAccessible) BatchSchema(org.apache.drill.exec.record.BatchSchema) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RepeatedVarCharVector(org.apache.drill.exec.vector.RepeatedVarCharVector) VarCharVector(org.apache.drill.exec.vector.VarCharVector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) Test(org.junit.Test)

Example 24 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.

the class DumpCat method getBatchMetaInfo.

/* Get batch meta info : rows, selectedRows, dataSize */
private BatchMetaInfo getBatchMetaInfo(VectorAccessibleSerializable vcSerializable) {
    final VectorAccessible vectorContainer = vcSerializable.get();
    int rows;
    int selectedRows;
    int totalDataSize = 0;
    rows = vectorContainer.getRecordCount();
    selectedRows = rows;
    if (vectorContainer.getSchema().getSelectionVectorMode() == SelectionVectorMode.TWO_BYTE) {
        selectedRows = vcSerializable.getSv2().getCount();
    }
    for (final VectorWrapper w : vectorContainer) {
        totalDataSize += w.getValueVector().getBufferSize();
    }
    return new BatchMetaInfo(rows, selectedRows, totalDataSize);
}
Also used : VectorAccessible(org.apache.drill.exec.record.VectorAccessible) VectorWrapper(org.apache.drill.exec.record.VectorWrapper)

Example 25 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.

the class NestedLoopJoinBatch method setupWorker.

/**
 * Method generates the runtime code needed for NLJ. Other than the setup method to set the input and output value
 * vector references we implement three more methods
 * 1. doEval() -> Evaluates if record from left side matches record from the right side
 * 2. emitLeft() -> Project record from the left side
 * 3. emitRight() -> Project record from the right side (which is a hyper container)
 * @return the runtime generated class that implements the NestedLoopJoin interface
 */
private NestedLoopJoin setupWorker() throws IOException, ClassTransformationException, SchemaChangeException {
    final CodeGenerator<NestedLoopJoin> nLJCodeGenerator = CodeGenerator.get(NestedLoopJoin.TEMPLATE_DEFINITION, context.getOptions());
    nLJCodeGenerator.plainJavaCapable(true);
    // Uncomment out this line to debug the generated code.
    // nLJCodeGenerator.saveCodeForDebugging(true);
    final ClassGenerator<NestedLoopJoin> nLJClassGenerator = nLJCodeGenerator.getRoot();
    // generate doEval
    final ErrorCollector collector = new ErrorCollectorImpl();
    /*
        Logical expression may contain fields from left and right batches. During code generation (materialization)
        we need to indicate from which input field should be taken.

        Non-equality joins can belong to one of below categories. For example:
        1. Join on non-equality join predicates:
        select * from t1 inner join t2 on (t1.c1 between t2.c1 AND t2.c2) AND (...)
        2. Join with an OR predicate:
        select * from t1 inner join t2 on on t1.c1 = t2.c1 OR t1.c2 = t2.c2
     */
    Map<VectorAccessible, BatchReference> batches = ImmutableMap.<VectorAccessible, BatchReference>builder().put(left, new BatchReference("leftBatch", "leftIndex")).put(rightContainer, new BatchReference("rightContainer", "rightBatchIndex", "rightRecordIndexWithinBatch")).build();
    LogicalExpression materialize = ExpressionTreeMaterializer.materialize(popConfig.getCondition(), batches, collector, context.getFunctionRegistry(), false, false);
    if (collector.hasErrors()) {
        throw new SchemaChangeException(String.format("Failure while trying to materialize join condition. Errors:\n %s.", collector.toErrorString()));
    }
    nLJClassGenerator.addExpr(new ReturnValueExpression(materialize), ClassGenerator.BlkCreateMode.FALSE);
    // generate emitLeft
    nLJClassGenerator.setMappingSet(emitLeftMapping);
    JExpression outIndex = JExpr.direct("outIndex");
    JExpression leftIndex = JExpr.direct("leftIndex");
    int fieldId = 0;
    int outputFieldId = 0;
    if (leftSchema != null) {
        // Set the input and output value vector references corresponding to the left batch
        for (MaterializedField field : leftSchema) {
            final TypeProtos.MajorType fieldType = field.getType();
            // Add the vector to the output container
            container.addOrGet(field);
            JVar inVV = nLJClassGenerator.declareVectorValueSetupAndMember("leftBatch", new TypedFieldId(fieldType, false, fieldId));
            JVar outVV = nLJClassGenerator.declareVectorValueSetupAndMember("outgoing", new TypedFieldId(fieldType, false, outputFieldId));
            nLJClassGenerator.getEvalBlock().add(outVV.invoke("copyFromSafe").arg(leftIndex).arg(outIndex).arg(inVV));
            nLJClassGenerator.rotateBlock();
            fieldId++;
            outputFieldId++;
        }
    }
    // generate emitRight
    fieldId = 0;
    nLJClassGenerator.setMappingSet(emitRightMapping);
    JExpression batchIndex = JExpr.direct("batchIndex");
    JExpression recordIndexWithinBatch = JExpr.direct("recordIndexWithinBatch");
    if (rightSchema != null) {
        // Set the input and output value vector references corresponding to the right batch
        for (MaterializedField field : rightSchema) {
            final TypeProtos.MajorType inputType = field.getType();
            TypeProtos.MajorType outputType;
            // if join type is LEFT, make sure right batch output fields data mode is optional
            if (popConfig.getJoinType() == JoinRelType.LEFT && inputType.getMode() == TypeProtos.DataMode.REQUIRED) {
                outputType = Types.overrideMode(inputType, TypeProtos.DataMode.OPTIONAL);
            } else {
                outputType = inputType;
            }
            MaterializedField newField = MaterializedField.create(field.getName(), outputType);
            container.addOrGet(newField);
            JVar inVV = nLJClassGenerator.declareVectorValueSetupAndMember("rightContainer", new TypedFieldId(inputType, true, fieldId));
            JVar outVV = nLJClassGenerator.declareVectorValueSetupAndMember("outgoing", new TypedFieldId(outputType, false, outputFieldId));
            nLJClassGenerator.getEvalBlock().add(outVV.invoke("copyFromSafe").arg(recordIndexWithinBatch).arg(outIndex).arg(inVV.component(batchIndex)));
            nLJClassGenerator.rotateBlock();
            fieldId++;
            outputFieldId++;
        }
    }
    return context.getImplementationClass(nLJCodeGenerator);
}
Also used : VectorAccessible(org.apache.drill.exec.record.VectorAccessible) ErrorCollector(org.apache.drill.common.expression.ErrorCollector) MaterializedField(org.apache.drill.exec.record.MaterializedField) JExpression(com.sun.codemodel.JExpression) TypeProtos(org.apache.drill.common.types.TypeProtos) ErrorCollectorImpl(org.apache.drill.common.expression.ErrorCollectorImpl) ReturnValueExpression(org.apache.drill.exec.physical.impl.filter.ReturnValueExpression) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) BatchReference(org.apache.drill.exec.expr.BatchReference) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) JVar(com.sun.codemodel.JVar)

Aggregations

VectorAccessible (org.apache.drill.exec.record.VectorAccessible)32 ValueVector (org.apache.drill.exec.vector.ValueVector)17 Test (org.junit.Test)14 UInt4Vector (org.apache.drill.exec.vector.UInt4Vector)12 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)8 RecordBatchSizer (org.apache.drill.exec.record.RecordBatchSizer)8 RepeatedVarCharVector (org.apache.drill.exec.vector.RepeatedVarCharVector)8 BatchSchema (org.apache.drill.exec.record.BatchSchema)7 MaterializedField (org.apache.drill.exec.record.MaterializedField)7 ScanBatch (org.apache.drill.exec.physical.impl.ScanBatch)6 RecordBatch (org.apache.drill.exec.record.RecordBatch)6 VarCharVector (org.apache.drill.exec.vector.VarCharVector)6 ExecTest (org.apache.drill.exec.ExecTest)4 FragmentContextImpl (org.apache.drill.exec.ops.FragmentContextImpl)4 SelectionVector2 (org.apache.drill.exec.record.selection.SelectionVector2)4 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)4 Text (org.apache.drill.exec.util.Text)4 RepeatedListVector (org.apache.drill.exec.vector.complex.RepeatedListVector)4 RepeatedValueVector (org.apache.drill.exec.vector.complex.RepeatedValueVector)4 List (java.util.List)3