Search in sources :

Example 36 with RecordBatchLoader

use of org.apache.drill.exec.record.RecordBatchLoader in project drill by apache.

the class DrillTestWrapper method compareUnorderedResults.

/**
   * Use this method only if necessary to validate one query against another. If you are just validating against a
   * baseline file use one of the simpler interfaces that will write the validation query for you.
   *
   * @throws Exception
   */
protected void compareUnorderedResults() throws Exception {
    RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
    List<QueryDataBatch> actual = Collections.emptyList();
    List<QueryDataBatch> expected = Collections.emptyList();
    List<Map<String, Object>> expectedRecords = new ArrayList<>();
    List<Map<String, Object>> actualRecords = new ArrayList<>();
    try {
        test(testOptionSettingQueries);
        actual = testRunAndReturn(queryType, query);
        checkNumBatches(actual);
        addTypeInfoIfMissing(actual.get(0), testBuilder);
        addToMaterializedResults(actualRecords, actual, loader);
        // the cases where the baseline is stored in a file.
        if (baselineRecords == null) {
            test(baselineOptionSettingQueries);
            expected = testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery());
            addToMaterializedResults(expectedRecords, expected, loader);
        } else {
            expectedRecords = baselineRecords;
        }
        compareResults(expectedRecords, actualRecords);
    } finally {
        cleanupBatches(actual, expected);
    }
}
Also used : QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) ArrayList(java.util.ArrayList) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap)

Example 37 with RecordBatchLoader

use of org.apache.drill.exec.record.RecordBatchLoader in project drill by apache.

the class DrillTestWrapper method compareMergedOnHeapVectors.

public void compareMergedOnHeapVectors() throws Exception {
    RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
    List<QueryDataBatch> actual = Collections.emptyList();
    List<QueryDataBatch> expected = Collections.emptyList();
    Map<String, List<Object>> actualSuperVectors;
    Map<String, List<Object>> expectedSuperVectors;
    try {
        test(testOptionSettingQueries);
        actual = testRunAndReturn(queryType, query);
        checkNumBatches(actual);
        // To avoid extra work for test writers, types can optionally be inferred from the test query
        addTypeInfoIfMissing(actual.get(0), testBuilder);
        BatchIterator batchIter = new BatchIterator(actual, loader);
        actualSuperVectors = addToCombinedVectorResults(batchIter);
        batchIter.close();
        // the cases where the baseline is stored in a file.
        if (baselineRecords == null) {
            test(baselineOptionSettingQueries);
            expected = testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery());
            BatchIterator exBatchIter = new BatchIterator(expected, loader);
            expectedSuperVectors = addToCombinedVectorResults(exBatchIter);
            exBatchIter.close();
        } else {
            // data is built in the TestBuilder in a row major format as it is provided by the user
            // translate it here to vectorized, the representation expected by the ordered comparison
            expectedSuperVectors = translateRecordListToHeapVectors(baselineRecords);
        }
        compareMergedVectors(expectedSuperVectors, actualSuperVectors);
    } catch (Exception e) {
        throw new Exception(e.getMessage() + "\nFor query: " + query, e);
    } finally {
        cleanupBatches(expected, actual);
    }
}
Also used : QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) ArrayList(java.util.ArrayList) List(java.util.List) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) UnsupportedEncodingException(java.io.UnsupportedEncodingException)

Example 38 with RecordBatchLoader

use of org.apache.drill.exec.record.RecordBatchLoader in project drill by apache.

the class MergingRecordBatch method innerNext.

@Override
public IterOutcome innerNext() {
    if (fragProviders.length == 0) {
        return IterOutcome.NONE;
    }
    boolean schemaChanged = false;
    if (prevBatchWasFull) {
        logger.debug("Outgoing vectors were full on last iteration");
        allocateOutgoing();
        outgoingPosition = 0;
        prevBatchWasFull = false;
    }
    if (!hasMoreIncoming) {
        logger.debug("next() was called after all values have been processed");
        outgoingPosition = 0;
        return IterOutcome.NONE;
    }
    // lazy initialization
    if (!hasRun) {
        // first iteration is always a schema change
        schemaChanged = true;
        // set up each (non-empty) incoming record batch
        final List<RawFragmentBatch> rawBatches = Lists.newArrayList();
        int p = 0;
        for (@SuppressWarnings("unused") final RawFragmentBatchProvider provider : fragProviders) {
            RawFragmentBatch rawBatch;
            // check if there is a batch in temp holder before calling getNext(), as it may have been used when building schema
            if (tempBatchHolder[p] != null) {
                rawBatch = tempBatchHolder[p];
                tempBatchHolder[p] = null;
            } else {
                try {
                    rawBatch = getNext(p);
                } catch (final IOException e) {
                    context.fail(e);
                    return IterOutcome.STOP;
                }
            }
            if (rawBatch == null && !context.shouldContinue()) {
                clearBatches(rawBatches);
                return IterOutcome.STOP;
            }
            assert rawBatch != null : "rawBatch is null although context.shouldContinue() == true";
            if (rawBatch.getHeader().getDef().getRecordCount() != 0) {
                rawBatches.add(rawBatch);
            } else {
                // save an empty batch to use for schema purposes. ignore batch if it contains no fields, and thus no schema
                if (emptyBatch == null && rawBatch.getHeader().getDef().getFieldCount() != 0) {
                    emptyBatch = rawBatch;
                }
                try {
                    while ((rawBatch = getNext(p)) != null && rawBatch.getHeader().getDef().getRecordCount() == 0) {
                    // Do nothing
                    }
                    if (rawBatch == null && !context.shouldContinue()) {
                        clearBatches(rawBatches);
                        return IterOutcome.STOP;
                    }
                } catch (final IOException e) {
                    context.fail(e);
                    clearBatches(rawBatches);
                    return IterOutcome.STOP;
                }
                if (rawBatch != null) {
                    rawBatches.add(rawBatch);
                } else {
                    rawBatches.add(emptyBatch);
                }
            }
            p++;
        }
        // allocate the incoming record batch loaders
        senderCount = rawBatches.size();
        incomingBatches = new RawFragmentBatch[senderCount];
        batchOffsets = new int[senderCount];
        batchLoaders = new RecordBatchLoader[senderCount];
        for (int i = 0; i < senderCount; ++i) {
            incomingBatches[i] = rawBatches.get(i);
            batchLoaders[i] = new RecordBatchLoader(oContext.getAllocator());
        }
        // after this point all batches have moved to incomingBatches
        rawBatches.clear();
        int i = 0;
        for (final RawFragmentBatch batch : incomingBatches) {
            // initialize the incoming batchLoaders
            final UserBitShared.RecordBatchDef rbd = batch.getHeader().getDef();
            try {
                batchLoaders[i].load(rbd, batch.getBody());
            // TODO:  Clean:  DRILL-2933:  That load(...) no longer throws
            // SchemaChangeException, so check/clean catch clause below.
            } catch (final SchemaChangeException e) {
                logger.error("MergingReceiver failed to load record batch from remote host.  {}", e);
                context.fail(e);
                return IterOutcome.STOP;
            }
            batch.release();
            ++batchOffsets[i];
            ++i;
        }
        // Canonicalize each incoming batch, so that vectors are alphabetically sorted based on SchemaPath.
        for (final RecordBatchLoader loader : batchLoaders) {
            loader.canonicalize();
        }
        // Ensure all the incoming batches have the identical schema.
        if (!isSameSchemaAmongBatches(batchLoaders)) {
            context.fail(new SchemaChangeException("Incoming batches for merging receiver have different schemas!"));
            return IterOutcome.STOP;
        }
        // create the outgoing schema and vector container, and allocate the initial batch
        final SchemaBuilder bldr = BatchSchema.newBuilder().setSelectionVectorMode(BatchSchema.SelectionVectorMode.NONE);
        for (final VectorWrapper<?> v : batchLoaders[0]) {
            // add field to the output schema
            bldr.addField(v.getField());
            // allocate a new value vector
            outgoingContainer.addOrGet(v.getField());
        }
        allocateOutgoing();
        outgoingContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
        // generate code for merge operations (copy and compare)
        try {
            merger = createMerger();
        } catch (final SchemaChangeException e) {
            logger.error("Failed to generate code for MergingReceiver.  {}", e);
            context.fail(e);
            return IterOutcome.STOP;
        }
        // allocate the priority queue with the generated comparator
        this.pqueue = new PriorityQueue<>(fragProviders.length, new Comparator<Node>() {

            @Override
            public int compare(final Node node1, final Node node2) {
                final int leftIndex = (node1.batchId << 16) + node1.valueIndex;
                final int rightIndex = (node2.batchId << 16) + node2.valueIndex;
                try {
                    return merger.doEval(leftIndex, rightIndex);
                } catch (SchemaChangeException e) {
                    throw new UnsupportedOperationException(e);
                }
            }
        });
        // populate the priority queue with initial values
        for (int b = 0; b < senderCount; ++b) {
            while (batchLoaders[b] != null && batchLoaders[b].getRecordCount() == 0) {
                try {
                    final RawFragmentBatch batch = getNext(b);
                    incomingBatches[b] = batch;
                    if (batch != null) {
                        batchLoaders[b].load(batch.getHeader().getDef(), batch.getBody());
                    } else {
                        batchLoaders[b].clear();
                        batchLoaders[b] = null;
                        if (!context.shouldContinue()) {
                            return IterOutcome.STOP;
                        }
                    }
                } catch (IOException | SchemaChangeException e) {
                    context.fail(e);
                    return IterOutcome.STOP;
                }
            }
            if (batchLoaders[b] != null) {
                pqueue.add(new Node(b, 0));
            }
        }
        hasRun = true;
    // finished lazy initialization
    }
    while (!pqueue.isEmpty()) {
        // pop next value from pq and copy to outgoing batch
        final Node node = pqueue.peek();
        if (!copyRecordToOutgoingBatch(node)) {
            logger.debug("Outgoing vectors space is full; breaking");
            prevBatchWasFull = true;
        }
        pqueue.poll();
        if (node.valueIndex == batchLoaders[node.batchId].getRecordCount() - 1) {
            // reached the end of an incoming record batch
            RawFragmentBatch nextBatch;
            try {
                nextBatch = getNext(node.batchId);
                while (nextBatch != null && nextBatch.getHeader().getDef().getRecordCount() == 0) {
                    nextBatch = getNext(node.batchId);
                }
                assert nextBatch != null || inputCounts[node.batchId] == outputCounts[node.batchId] : String.format("Stream %d input count: %d output count %d", node.batchId, inputCounts[node.batchId], outputCounts[node.batchId]);
                if (nextBatch == null && !context.shouldContinue()) {
                    return IterOutcome.STOP;
                }
            } catch (final IOException e) {
                context.fail(e);
                return IterOutcome.STOP;
            }
            incomingBatches[node.batchId] = nextBatch;
            if (nextBatch == null) {
                // batch is empty
                boolean allBatchesEmpty = true;
                for (final RawFragmentBatch batch : incomingBatches) {
                    // see if all batches are empty so we can return OK_* or NONE
                    if (batch != null) {
                        allBatchesEmpty = false;
                        break;
                    }
                }
                if (allBatchesEmpty) {
                    hasMoreIncoming = false;
                    break;
                }
                // ignored in subsequent iterations.
                if (prevBatchWasFull) {
                    break;
                } else {
                    continue;
                }
            }
            final UserBitShared.RecordBatchDef rbd = incomingBatches[node.batchId].getHeader().getDef();
            try {
                batchLoaders[node.batchId].load(rbd, incomingBatches[node.batchId].getBody());
            // TODO:  Clean:  DRILL-2933:  That load(...) no longer throws
            // SchemaChangeException, so check/clean catch clause below.
            } catch (final SchemaChangeException ex) {
                context.fail(ex);
                return IterOutcome.STOP;
            }
            incomingBatches[node.batchId].release();
            batchOffsets[node.batchId] = 0;
            // add front value from batch[x] to priority queue
            if (batchLoaders[node.batchId].getRecordCount() != 0) {
                pqueue.add(new Node(node.batchId, 0));
            }
        } else {
            pqueue.add(new Node(node.batchId, node.valueIndex + 1));
        }
        if (prevBatchWasFull) {
            break;
        }
    }
    // set the value counts in the outgoing vectors
    for (final VectorWrapper<?> vw : outgoingContainer) {
        vw.getValueVector().getMutator().setValueCount(outgoingPosition);
    }
    if (pqueue.isEmpty()) {
        state = BatchState.DONE;
    }
    if (schemaChanged) {
        return IterOutcome.OK_NEW_SCHEMA;
    } else {
        return IterOutcome.OK;
    }
}
Also used : RawFragmentBatch(org.apache.drill.exec.record.RawFragmentBatch) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) IOException(java.io.IOException) MinorFragmentEndpoint(org.apache.drill.exec.physical.MinorFragmentEndpoint) Comparator(java.util.Comparator) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) RawFragmentBatchProvider(org.apache.drill.exec.record.RawFragmentBatchProvider) SchemaBuilder(org.apache.drill.exec.record.SchemaBuilder) UserBitShared(org.apache.drill.exec.proto.UserBitShared)

Example 39 with RecordBatchLoader

use of org.apache.drill.exec.record.RecordBatchLoader in project drill by apache.

the class TestOptiqPlans method testLogicalJsonScan.

@Test
public void testLogicalJsonScan() throws Exception {
    final RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
    try (final Drillbit bit1 = new Drillbit(config, serviceSet);
        final DrillClient client = new DrillClient(config, serviceSet.getCoordinator())) {
        bit1.run();
        client.connect();
        final List<QueryDataBatch> results = client.runQuery(org.apache.drill.exec.proto.UserBitShared.QueryType.LOGICAL, Resources.toString(Resources.getResource("logical_json_scan.json"), Charsets.UTF_8));
        final RecordBatchLoader loader = new RecordBatchLoader(bit1.getContext().getAllocator());
        for (final QueryDataBatch b : results) {
            System.out.println(String.format("Got %d results", b.getHeader().getRowCount()));
            loader.load(b.getHeader().getDef(), b.getData());
            for (final VectorWrapper vw : loader) {
                System.out.println(vw.getValueVector().getField().getPath());
                final ValueVector vv = vw.getValueVector();
                for (int i = 0; i < vv.getAccessor().getValueCount(); i++) {
                    final Object o = vv.getAccessor().getObject(i);
                    if (vv instanceof VarBinaryVector) {
                        final VarBinaryVector.Accessor x = ((VarBinaryVector) vv).getAccessor();
                        final VarBinaryHolder vbh = new VarBinaryHolder();
                        x.get(i, vbh);
                        System.out.printf("%d..%d", vbh.start, vbh.end);
                        System.out.println("[" + new String((byte[]) vv.getAccessor().getObject(i)) + "]");
                    } else {
                        System.out.println(vv.getAccessor().getObject(i));
                    }
                }
            }
            loader.clear();
            b.release();
        }
        client.close();
    }
}
Also used : RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) VarBinaryHolder(org.apache.drill.exec.expr.holders.VarBinaryHolder) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) VarBinaryVector(org.apache.drill.exec.vector.VarBinaryVector) ValueVector(org.apache.drill.exec.vector.ValueVector) QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) Drillbit(org.apache.drill.exec.server.Drillbit) RemoteServiceSet(org.apache.drill.exec.server.RemoteServiceSet) DrillClient(org.apache.drill.exec.client.DrillClient) ExecTest(org.apache.drill.exec.ExecTest) Test(org.junit.Test)

Example 40 with RecordBatchLoader

use of org.apache.drill.exec.record.RecordBatchLoader in project drill by apache.

the class TestSimpleFragmentRun method runJSONScanPopFragment.

@Test
public void runJSONScanPopFragment() throws Exception {
    try (final RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
        final Drillbit bit = new Drillbit(CONFIG, serviceSet);
        final DrillClient client = new DrillClient(CONFIG, serviceSet.getCoordinator())) {
        // run query.
        bit.run();
        client.connect();
        final List<QueryDataBatch> results = client.runQuery(QueryType.PHYSICAL, Files.toString(FileUtils.getResourceAsFile("/physical_json_scan_test1.json"), Charsets.UTF_8).replace("#{TEST_FILE}", FileUtils.getResourceAsFile("/scan_json_test_1.json").toURI().toString()));
        // look at records
        final RecordBatchLoader batchLoader = new RecordBatchLoader(RootAllocatorFactory.newRoot(CONFIG));
        int recordCount = 0;
        for (int i = 0; i < results.size(); ++i) {
            final QueryDataBatch batch = results.get(i);
            if (i == 0) {
                assertTrue(batch.hasData());
            } else {
                assertFalse(batch.hasData());
                batch.release();
                continue;
            }
            assertTrue(batchLoader.load(batch.getHeader().getDef(), batch.getData()));
            boolean firstColumn = true;
            // print headers.
            System.out.println("\n\n========NEW SCHEMA=========\n\n");
            for (final VectorWrapper<?> v : batchLoader) {
                if (firstColumn) {
                    firstColumn = false;
                } else {
                    System.out.print("\t");
                }
                System.out.print(v.getField().getPath());
                System.out.print("[");
                System.out.print(v.getField().getType().getMinorType());
                System.out.print("]");
            }
            System.out.println();
            for (int r = 0; r < batchLoader.getRecordCount(); r++) {
                boolean first = true;
                recordCount++;
                for (final VectorWrapper<?> v : batchLoader) {
                    if (first) {
                        first = false;
                    } else {
                        System.out.print("\t");
                    }
                    final ValueVector.Accessor accessor = v.getValueVector().getAccessor();
                    System.out.print(accessor.getObject(r));
                }
                if (!first) {
                    System.out.println();
                }
            }
            batchLoader.clear();
            batch.release();
        }
        assertEquals(2, recordCount);
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) Drillbit(org.apache.drill.exec.server.Drillbit) RemoteServiceSet(org.apache.drill.exec.server.RemoteServiceSet) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) DrillClient(org.apache.drill.exec.client.DrillClient) Test(org.junit.Test)

Aggregations

RecordBatchLoader (org.apache.drill.exec.record.RecordBatchLoader)70 QueryDataBatch (org.apache.drill.exec.rpc.user.QueryDataBatch)64 Test (org.junit.Test)45 DrillClient (org.apache.drill.exec.client.DrillClient)37 Drillbit (org.apache.drill.exec.server.Drillbit)36 RemoteServiceSet (org.apache.drill.exec.server.RemoteServiceSet)36 ValueVector (org.apache.drill.exec.vector.ValueVector)34 VectorWrapper (org.apache.drill.exec.record.VectorWrapper)17 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)8 BigIntVector (org.apache.drill.exec.vector.BigIntVector)8 ExecTest (org.apache.drill.exec.ExecTest)7 VarCharVector (org.apache.drill.exec.vector.VarCharVector)6 DrillbitEndpoint (org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)5 NullableVarCharVector (org.apache.drill.exec.vector.NullableVarCharVector)5 ArrayList (java.util.ArrayList)4 SchemaPath (org.apache.drill.common.expression.SchemaPath)4 IOException (java.io.IOException)3 VarBinaryHolder (org.apache.drill.exec.expr.holders.VarBinaryHolder)3 BufferAllocator (org.apache.drill.exec.memory.BufferAllocator)3 QueryData (org.apache.drill.exec.proto.UserBitShared.QueryData)3