use of org.apache.drill.exec.record.RecordBatchLoader in project drill by apache.
the class DrillTestWrapper method compareUnorderedResults.
/**
* Use this method only if necessary to validate one query against another. If you are just validating against a
* baseline file use one of the simpler interfaces that will write the validation query for you.
*
* @throws Exception
*/
protected void compareUnorderedResults() throws Exception {
RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
List<QueryDataBatch> actual = Collections.emptyList();
List<QueryDataBatch> expected = Collections.emptyList();
List<Map<String, Object>> expectedRecords = new ArrayList<>();
List<Map<String, Object>> actualRecords = new ArrayList<>();
try {
test(testOptionSettingQueries);
actual = testRunAndReturn(queryType, query);
checkNumBatches(actual);
addTypeInfoIfMissing(actual.get(0), testBuilder);
addToMaterializedResults(actualRecords, actual, loader);
// the cases where the baseline is stored in a file.
if (baselineRecords == null) {
test(baselineOptionSettingQueries);
expected = testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery());
addToMaterializedResults(expectedRecords, expected, loader);
} else {
expectedRecords = baselineRecords;
}
compareResults(expectedRecords, actualRecords);
} finally {
cleanupBatches(actual, expected);
}
}
use of org.apache.drill.exec.record.RecordBatchLoader in project drill by apache.
the class DrillTestWrapper method compareMergedOnHeapVectors.
public void compareMergedOnHeapVectors() throws Exception {
RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
List<QueryDataBatch> actual = Collections.emptyList();
List<QueryDataBatch> expected = Collections.emptyList();
Map<String, List<Object>> actualSuperVectors;
Map<String, List<Object>> expectedSuperVectors;
try {
test(testOptionSettingQueries);
actual = testRunAndReturn(queryType, query);
checkNumBatches(actual);
// To avoid extra work for test writers, types can optionally be inferred from the test query
addTypeInfoIfMissing(actual.get(0), testBuilder);
BatchIterator batchIter = new BatchIterator(actual, loader);
actualSuperVectors = addToCombinedVectorResults(batchIter);
batchIter.close();
// the cases where the baseline is stored in a file.
if (baselineRecords == null) {
test(baselineOptionSettingQueries);
expected = testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery());
BatchIterator exBatchIter = new BatchIterator(expected, loader);
expectedSuperVectors = addToCombinedVectorResults(exBatchIter);
exBatchIter.close();
} else {
// data is built in the TestBuilder in a row major format as it is provided by the user
// translate it here to vectorized, the representation expected by the ordered comparison
expectedSuperVectors = translateRecordListToHeapVectors(baselineRecords);
}
compareMergedVectors(expectedSuperVectors, actualSuperVectors);
} catch (Exception e) {
throw new Exception(e.getMessage() + "\nFor query: " + query, e);
} finally {
cleanupBatches(expected, actual);
}
}
use of org.apache.drill.exec.record.RecordBatchLoader in project drill by apache.
the class MergingRecordBatch method innerNext.
@Override
public IterOutcome innerNext() {
if (fragProviders.length == 0) {
return IterOutcome.NONE;
}
boolean schemaChanged = false;
if (prevBatchWasFull) {
logger.debug("Outgoing vectors were full on last iteration");
allocateOutgoing();
outgoingPosition = 0;
prevBatchWasFull = false;
}
if (!hasMoreIncoming) {
logger.debug("next() was called after all values have been processed");
outgoingPosition = 0;
return IterOutcome.NONE;
}
// lazy initialization
if (!hasRun) {
// first iteration is always a schema change
schemaChanged = true;
// set up each (non-empty) incoming record batch
final List<RawFragmentBatch> rawBatches = Lists.newArrayList();
int p = 0;
for (@SuppressWarnings("unused") final RawFragmentBatchProvider provider : fragProviders) {
RawFragmentBatch rawBatch;
// check if there is a batch in temp holder before calling getNext(), as it may have been used when building schema
if (tempBatchHolder[p] != null) {
rawBatch = tempBatchHolder[p];
tempBatchHolder[p] = null;
} else {
try {
rawBatch = getNext(p);
} catch (final IOException e) {
context.fail(e);
return IterOutcome.STOP;
}
}
if (rawBatch == null && !context.shouldContinue()) {
clearBatches(rawBatches);
return IterOutcome.STOP;
}
assert rawBatch != null : "rawBatch is null although context.shouldContinue() == true";
if (rawBatch.getHeader().getDef().getRecordCount() != 0) {
rawBatches.add(rawBatch);
} else {
// save an empty batch to use for schema purposes. ignore batch if it contains no fields, and thus no schema
if (emptyBatch == null && rawBatch.getHeader().getDef().getFieldCount() != 0) {
emptyBatch = rawBatch;
}
try {
while ((rawBatch = getNext(p)) != null && rawBatch.getHeader().getDef().getRecordCount() == 0) {
// Do nothing
}
if (rawBatch == null && !context.shouldContinue()) {
clearBatches(rawBatches);
return IterOutcome.STOP;
}
} catch (final IOException e) {
context.fail(e);
clearBatches(rawBatches);
return IterOutcome.STOP;
}
if (rawBatch != null) {
rawBatches.add(rawBatch);
} else {
rawBatches.add(emptyBatch);
}
}
p++;
}
// allocate the incoming record batch loaders
senderCount = rawBatches.size();
incomingBatches = new RawFragmentBatch[senderCount];
batchOffsets = new int[senderCount];
batchLoaders = new RecordBatchLoader[senderCount];
for (int i = 0; i < senderCount; ++i) {
incomingBatches[i] = rawBatches.get(i);
batchLoaders[i] = new RecordBatchLoader(oContext.getAllocator());
}
// after this point all batches have moved to incomingBatches
rawBatches.clear();
int i = 0;
for (final RawFragmentBatch batch : incomingBatches) {
// initialize the incoming batchLoaders
final UserBitShared.RecordBatchDef rbd = batch.getHeader().getDef();
try {
batchLoaders[i].load(rbd, batch.getBody());
// TODO: Clean: DRILL-2933: That load(...) no longer throws
// SchemaChangeException, so check/clean catch clause below.
} catch (final SchemaChangeException e) {
logger.error("MergingReceiver failed to load record batch from remote host. {}", e);
context.fail(e);
return IterOutcome.STOP;
}
batch.release();
++batchOffsets[i];
++i;
}
// Canonicalize each incoming batch, so that vectors are alphabetically sorted based on SchemaPath.
for (final RecordBatchLoader loader : batchLoaders) {
loader.canonicalize();
}
// Ensure all the incoming batches have the identical schema.
if (!isSameSchemaAmongBatches(batchLoaders)) {
context.fail(new SchemaChangeException("Incoming batches for merging receiver have different schemas!"));
return IterOutcome.STOP;
}
// create the outgoing schema and vector container, and allocate the initial batch
final SchemaBuilder bldr = BatchSchema.newBuilder().setSelectionVectorMode(BatchSchema.SelectionVectorMode.NONE);
for (final VectorWrapper<?> v : batchLoaders[0]) {
// add field to the output schema
bldr.addField(v.getField());
// allocate a new value vector
outgoingContainer.addOrGet(v.getField());
}
allocateOutgoing();
outgoingContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
// generate code for merge operations (copy and compare)
try {
merger = createMerger();
} catch (final SchemaChangeException e) {
logger.error("Failed to generate code for MergingReceiver. {}", e);
context.fail(e);
return IterOutcome.STOP;
}
// allocate the priority queue with the generated comparator
this.pqueue = new PriorityQueue<>(fragProviders.length, new Comparator<Node>() {
@Override
public int compare(final Node node1, final Node node2) {
final int leftIndex = (node1.batchId << 16) + node1.valueIndex;
final int rightIndex = (node2.batchId << 16) + node2.valueIndex;
try {
return merger.doEval(leftIndex, rightIndex);
} catch (SchemaChangeException e) {
throw new UnsupportedOperationException(e);
}
}
});
// populate the priority queue with initial values
for (int b = 0; b < senderCount; ++b) {
while (batchLoaders[b] != null && batchLoaders[b].getRecordCount() == 0) {
try {
final RawFragmentBatch batch = getNext(b);
incomingBatches[b] = batch;
if (batch != null) {
batchLoaders[b].load(batch.getHeader().getDef(), batch.getBody());
} else {
batchLoaders[b].clear();
batchLoaders[b] = null;
if (!context.shouldContinue()) {
return IterOutcome.STOP;
}
}
} catch (IOException | SchemaChangeException e) {
context.fail(e);
return IterOutcome.STOP;
}
}
if (batchLoaders[b] != null) {
pqueue.add(new Node(b, 0));
}
}
hasRun = true;
// finished lazy initialization
}
while (!pqueue.isEmpty()) {
// pop next value from pq and copy to outgoing batch
final Node node = pqueue.peek();
if (!copyRecordToOutgoingBatch(node)) {
logger.debug("Outgoing vectors space is full; breaking");
prevBatchWasFull = true;
}
pqueue.poll();
if (node.valueIndex == batchLoaders[node.batchId].getRecordCount() - 1) {
// reached the end of an incoming record batch
RawFragmentBatch nextBatch;
try {
nextBatch = getNext(node.batchId);
while (nextBatch != null && nextBatch.getHeader().getDef().getRecordCount() == 0) {
nextBatch = getNext(node.batchId);
}
assert nextBatch != null || inputCounts[node.batchId] == outputCounts[node.batchId] : String.format("Stream %d input count: %d output count %d", node.batchId, inputCounts[node.batchId], outputCounts[node.batchId]);
if (nextBatch == null && !context.shouldContinue()) {
return IterOutcome.STOP;
}
} catch (final IOException e) {
context.fail(e);
return IterOutcome.STOP;
}
incomingBatches[node.batchId] = nextBatch;
if (nextBatch == null) {
// batch is empty
boolean allBatchesEmpty = true;
for (final RawFragmentBatch batch : incomingBatches) {
// see if all batches are empty so we can return OK_* or NONE
if (batch != null) {
allBatchesEmpty = false;
break;
}
}
if (allBatchesEmpty) {
hasMoreIncoming = false;
break;
}
// ignored in subsequent iterations.
if (prevBatchWasFull) {
break;
} else {
continue;
}
}
final UserBitShared.RecordBatchDef rbd = incomingBatches[node.batchId].getHeader().getDef();
try {
batchLoaders[node.batchId].load(rbd, incomingBatches[node.batchId].getBody());
// TODO: Clean: DRILL-2933: That load(...) no longer throws
// SchemaChangeException, so check/clean catch clause below.
} catch (final SchemaChangeException ex) {
context.fail(ex);
return IterOutcome.STOP;
}
incomingBatches[node.batchId].release();
batchOffsets[node.batchId] = 0;
// add front value from batch[x] to priority queue
if (batchLoaders[node.batchId].getRecordCount() != 0) {
pqueue.add(new Node(node.batchId, 0));
}
} else {
pqueue.add(new Node(node.batchId, node.valueIndex + 1));
}
if (prevBatchWasFull) {
break;
}
}
// set the value counts in the outgoing vectors
for (final VectorWrapper<?> vw : outgoingContainer) {
vw.getValueVector().getMutator().setValueCount(outgoingPosition);
}
if (pqueue.isEmpty()) {
state = BatchState.DONE;
}
if (schemaChanged) {
return IterOutcome.OK_NEW_SCHEMA;
} else {
return IterOutcome.OK;
}
}
use of org.apache.drill.exec.record.RecordBatchLoader in project drill by apache.
the class TestOptiqPlans method testLogicalJsonScan.
@Test
public void testLogicalJsonScan() throws Exception {
final RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
try (final Drillbit bit1 = new Drillbit(config, serviceSet);
final DrillClient client = new DrillClient(config, serviceSet.getCoordinator())) {
bit1.run();
client.connect();
final List<QueryDataBatch> results = client.runQuery(org.apache.drill.exec.proto.UserBitShared.QueryType.LOGICAL, Resources.toString(Resources.getResource("logical_json_scan.json"), Charsets.UTF_8));
final RecordBatchLoader loader = new RecordBatchLoader(bit1.getContext().getAllocator());
for (final QueryDataBatch b : results) {
System.out.println(String.format("Got %d results", b.getHeader().getRowCount()));
loader.load(b.getHeader().getDef(), b.getData());
for (final VectorWrapper vw : loader) {
System.out.println(vw.getValueVector().getField().getPath());
final ValueVector vv = vw.getValueVector();
for (int i = 0; i < vv.getAccessor().getValueCount(); i++) {
final Object o = vv.getAccessor().getObject(i);
if (vv instanceof VarBinaryVector) {
final VarBinaryVector.Accessor x = ((VarBinaryVector) vv).getAccessor();
final VarBinaryHolder vbh = new VarBinaryHolder();
x.get(i, vbh);
System.out.printf("%d..%d", vbh.start, vbh.end);
System.out.println("[" + new String((byte[]) vv.getAccessor().getObject(i)) + "]");
} else {
System.out.println(vv.getAccessor().getObject(i));
}
}
}
loader.clear();
b.release();
}
client.close();
}
}
use of org.apache.drill.exec.record.RecordBatchLoader in project drill by apache.
the class TestSimpleFragmentRun method runJSONScanPopFragment.
@Test
public void runJSONScanPopFragment() throws Exception {
try (final RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
final Drillbit bit = new Drillbit(CONFIG, serviceSet);
final DrillClient client = new DrillClient(CONFIG, serviceSet.getCoordinator())) {
// run query.
bit.run();
client.connect();
final List<QueryDataBatch> results = client.runQuery(QueryType.PHYSICAL, Files.toString(FileUtils.getResourceAsFile("/physical_json_scan_test1.json"), Charsets.UTF_8).replace("#{TEST_FILE}", FileUtils.getResourceAsFile("/scan_json_test_1.json").toURI().toString()));
// look at records
final RecordBatchLoader batchLoader = new RecordBatchLoader(RootAllocatorFactory.newRoot(CONFIG));
int recordCount = 0;
for (int i = 0; i < results.size(); ++i) {
final QueryDataBatch batch = results.get(i);
if (i == 0) {
assertTrue(batch.hasData());
} else {
assertFalse(batch.hasData());
batch.release();
continue;
}
assertTrue(batchLoader.load(batch.getHeader().getDef(), batch.getData()));
boolean firstColumn = true;
// print headers.
System.out.println("\n\n========NEW SCHEMA=========\n\n");
for (final VectorWrapper<?> v : batchLoader) {
if (firstColumn) {
firstColumn = false;
} else {
System.out.print("\t");
}
System.out.print(v.getField().getPath());
System.out.print("[");
System.out.print(v.getField().getType().getMinorType());
System.out.print("]");
}
System.out.println();
for (int r = 0; r < batchLoader.getRecordCount(); r++) {
boolean first = true;
recordCount++;
for (final VectorWrapper<?> v : batchLoader) {
if (first) {
first = false;
} else {
System.out.print("\t");
}
final ValueVector.Accessor accessor = v.getValueVector().getAccessor();
System.out.print(accessor.getObject(r));
}
if (!first) {
System.out.println();
}
}
batchLoader.clear();
batch.release();
}
assertEquals(2, recordCount);
}
}
Aggregations