use of org.apache.drill.exec.record.VectorContainer in project drill by axbaretto.
the class SpilledRuns method doMergeAndSpill.
private BatchGroup.SpilledRun doMergeAndSpill(List<? extends BatchGroup> batchesToSpill, int spillBatchRowCount, VectorInitializer allocHelper) throws Throwable {
// Merge the selected set of matches and write them to the
// spill file. After each write, we release the memory associated
// with the just-written batch.
String outputFile = spillSet.getNextSpillFile();
BatchGroup.SpilledRun newGroup = null;
VectorContainer dest = new VectorContainer();
try (AutoCloseable ignored = AutoCloseables.all(batchesToSpill);
PriorityQueueCopierWrapper.BatchMerger merger = copierHolder.startMerge(schema, batchesToSpill, dest, spillBatchRowCount, allocHelper)) {
newGroup = new BatchGroup.SpilledRun(spillSet, outputFile, context.getAllocator());
logger.trace("Spilling {} batches, into spill batches of {} rows, to {}", batchesToSpill.size(), spillBatchRowCount, outputFile);
while (merger.next()) {
// Add a new batch of records (given by merger.getOutput()) to the spill
// file.
//
// note that addBatch also clears the merger's output container
newGroup.addBatch(dest);
}
context.injectChecked(ExternalSortBatch.INTERRUPTION_WHILE_SPILLING, IOException.class);
newGroup.closeWriter();
logger.trace("Spilled {} output batches, each of {} bytes, {} records, to {}", merger.getBatchCount(), merger.getEstBatchSize(), spillBatchRowCount, outputFile);
newGroup.setBatchSize(merger.getEstBatchSize());
return newGroup;
} catch (Throwable e) {
// we only need to clean up newGroup if spill failed
try {
if (newGroup != null) {
AutoCloseables.close(e, newGroup);
}
} catch (Throwable t) {
/* close() may hit the same IO issue; just ignore */
}
throw e;
}
}
use of org.apache.drill.exec.record.VectorContainer in project drill by axbaretto.
the class ResultSetLoaderImpl method harvest.
@Override
public VectorContainer harvest() {
int rowCount;
switch(state) {
case ACTIVE:
case FULL_BATCH:
rowCount = harvestNormalBatch();
logger.trace("Harvesting {} rows", rowCount);
break;
case OVERFLOW:
rowCount = harvestOverflowBatch();
logger.trace("Harvesting {} rows after overflow", rowCount);
break;
default:
throw new IllegalStateException("Unexpected state: " + state);
}
// Build the output container
VectorContainer container = outputContainer();
container.setRecordCount(rowCount);
// Finalize: update counts, set state.
harvestBatchCount++;
previousRowCount += rowCount;
return container;
}
use of org.apache.drill.exec.record.VectorContainer in project drill by axbaretto.
the class TestBatchSerialization method verifySerialize.
/**
* Verify serialize and deserialize. Need to pass both the
* input and expected (even though the expected should be the same
* data as the input) because the act of serializing clears the
* input for obscure historical reasons.
*
* @param rowSet
* @param expected
* @throws IOException
*/
private void verifySerialize(SingleRowSet rowSet, SingleRowSet expected) throws IOException {
File dir = DirTestWatcher.createTempDir(dirTestWatcher.getDir());
FileChannel channel = FileChannel.open(new File(dir, "serialize.dat").toPath(), StandardOpenOption.CREATE, StandardOpenOption.WRITE);
VectorSerializer.Writer writer = VectorSerializer.writer(channel);
VectorContainer container = rowSet.container();
SelectionVector2 sv2 = rowSet.getSv2();
writer.write(container, sv2);
container.clear();
if (sv2 != null) {
sv2.clear();
}
writer.close();
File outFile = new File(dir, "serialize.dat");
assertTrue(outFile.exists());
assertTrue(outFile.isFile());
RowSet result;
try (InputStream in = new BufferedInputStream(new FileInputStream(outFile))) {
Reader reader = VectorSerializer.reader(fixture.allocator(), in);
result = fixture.wrap(reader.read(), reader.sv2());
}
new RowSetComparison(expected).verifyAndClearAll(result);
outFile.delete();
}
use of org.apache.drill.exec.record.VectorContainer in project drill by axbaretto.
the class TestWriteToDisk method test.
@Test
@SuppressWarnings("static-method")
public void test() throws Exception {
final List<ValueVector> vectorList = Lists.newArrayList();
final DrillConfig config = DrillConfig.create();
try (final RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
final Drillbit bit = new Drillbit(config, serviceSet)) {
bit.run();
final DrillbitContext context = bit.getContext();
final MaterializedField intField = MaterializedField.create("int", Types.required(TypeProtos.MinorType.INT));
final MaterializedField binField = MaterializedField.create("binary", Types.required(TypeProtos.MinorType.VARBINARY));
try (final IntVector intVector = (IntVector) TypeHelper.getNewVector(intField, context.getAllocator());
final VarBinaryVector binVector = (VarBinaryVector) TypeHelper.getNewVector(binField, context.getAllocator())) {
AllocationHelper.allocate(intVector, 4, 4);
AllocationHelper.allocate(binVector, 4, 5);
vectorList.add(intVector);
vectorList.add(binVector);
intVector.getMutator().setSafe(0, 0);
binVector.getMutator().setSafe(0, "ZERO".getBytes());
intVector.getMutator().setSafe(1, 1);
binVector.getMutator().setSafe(1, "ONE".getBytes());
intVector.getMutator().setSafe(2, 2);
binVector.getMutator().setSafe(2, "TWO".getBytes());
intVector.getMutator().setSafe(3, 3);
binVector.getMutator().setSafe(3, "THREE".getBytes());
intVector.getMutator().setValueCount(4);
binVector.getMutator().setValueCount(4);
VectorContainer container = new VectorContainer();
container.addCollection(vectorList);
container.setRecordCount(4);
@SuppressWarnings("resource") WritableBatch batch = WritableBatch.getBatchNoHVWrap(container.getRecordCount(), container, false);
VectorAccessibleSerializable wrap = new VectorAccessibleSerializable(batch, context.getAllocator());
final VectorAccessibleSerializable newWrap = new VectorAccessibleSerializable(context.getAllocator());
try (final FileSystem fs = getLocalFileSystem()) {
final File tempDir = Files.createTempDir();
tempDir.deleteOnExit();
final Path path = new Path(tempDir.getAbsolutePath(), "drillSerializable");
try (final FSDataOutputStream out = fs.create(path)) {
wrap.writeToStream(out);
}
try (final FSDataInputStream in = fs.open(path)) {
newWrap.readFromStream(in);
}
}
final VectorAccessible newContainer = newWrap.get();
for (VectorWrapper<?> w : newContainer) {
try (ValueVector vv = w.getValueVector()) {
int values = vv.getAccessor().getValueCount();
for (int i = 0; i < values; i++) {
final Object o = vv.getAccessor().getObject(i);
if (o instanceof byte[]) {
System.out.println(new String((byte[]) o));
} else {
System.out.println(o);
}
}
}
}
}
}
}
use of org.apache.drill.exec.record.VectorContainer in project drill by axbaretto.
the class TopNBatchTest method priorityQueueOrderingTest.
/**
* Priority queue unit test.
* @throws Exception
*/
@Test
public void priorityQueueOrderingTest() throws Exception {
Properties properties = new Properties();
DrillConfig drillConfig = DrillConfig.create(properties);
FieldReference expr = FieldReference.getWithQuotedRef("colA");
Order.Ordering ordering = new Order.Ordering(Order.Ordering.ORDER_DESC, expr, Order.Ordering.NULLS_FIRST);
List<Order.Ordering> orderings = Lists.newArrayList(ordering);
MaterializedField colA = MaterializedField.create("colA", Types.required(TypeProtos.MinorType.INT));
MaterializedField colB = MaterializedField.create("colB", Types.required(TypeProtos.MinorType.INT));
List<MaterializedField> cols = Lists.newArrayList(colA, colB);
BatchSchema batchSchema = new BatchSchema(BatchSchema.SelectionVectorMode.NONE, cols);
RowSet expectedRowSet;
try (RootAllocator allocator = new RootAllocator(100_000_000)) {
expectedRowSet = new RowSetBuilder(allocator, batchSchema).addRow(110, 10).addRow(109, 9).addRow(108, 8).addRow(107, 7).addRow(106, 6).addRow(105, 5).addRow(104, 4).addRow(103, 3).addRow(102, 2).addRow(101, 1).build();
PriorityQueue queue;
ExpandableHyperContainer hyperContainer;
{
VectorContainer container = new RowSetBuilder(allocator, batchSchema).build().container();
hyperContainer = new ExpandableHyperContainer(container);
queue = TopNBatch.createNewPriorityQueue(TopNBatch.createMainMappingSet(), TopNBatch.createLeftMappingSet(), TopNBatch.createRightMappingSet(), optionManager, new FunctionImplementationRegistry(drillConfig), new CodeCompiler(drillConfig, optionManager), orderings, hyperContainer, false, true, 10, allocator, batchSchema.getSelectionVectorMode());
}
List<RecordBatchData> testBatches = Lists.newArrayList();
try {
final Random random = new Random();
final int bound = 100;
final int numBatches = 11;
final int numRecordsPerBatch = 100;
for (int batchCounter = 0; batchCounter < numBatches; batchCounter++) {
RowSetBuilder rowSetBuilder = new RowSetBuilder(allocator, batchSchema);
rowSetBuilder.addRow((batchCounter + bound), batchCounter);
for (int recordCounter = 0; recordCounter < numRecordsPerBatch; recordCounter++) {
rowSetBuilder.addRow(random.nextInt(bound), random.nextInt(bound));
}
VectorContainer vectorContainer = rowSetBuilder.build().container();
queue.add(new RecordBatchData(vectorContainer, allocator));
}
queue.generate();
VectorContainer resultContainer = queue.getHyperBatch();
resultContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
RowSet.HyperRowSet actualHyperSet = HyperRowSetImpl.fromContainer(resultContainer, queue.getFinalSv4());
new RowSetComparison(expectedRowSet).verify(actualHyperSet);
} finally {
if (expectedRowSet != null) {
expectedRowSet.clear();
}
queue.cleanup();
hyperContainer.clear();
for (RecordBatchData testBatch : testBatches) {
testBatch.clear();
}
}
}
}
Aggregations