Search in sources :

Example 76 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by axbaretto.

the class SpilledRuns method doMergeAndSpill.

private BatchGroup.SpilledRun doMergeAndSpill(List<? extends BatchGroup> batchesToSpill, int spillBatchRowCount, VectorInitializer allocHelper) throws Throwable {
    // Merge the selected set of matches and write them to the
    // spill file. After each write, we release the memory associated
    // with the just-written batch.
    String outputFile = spillSet.getNextSpillFile();
    BatchGroup.SpilledRun newGroup = null;
    VectorContainer dest = new VectorContainer();
    try (AutoCloseable ignored = AutoCloseables.all(batchesToSpill);
        PriorityQueueCopierWrapper.BatchMerger merger = copierHolder.startMerge(schema, batchesToSpill, dest, spillBatchRowCount, allocHelper)) {
        newGroup = new BatchGroup.SpilledRun(spillSet, outputFile, context.getAllocator());
        logger.trace("Spilling {} batches, into spill batches of {} rows, to {}", batchesToSpill.size(), spillBatchRowCount, outputFile);
        while (merger.next()) {
            // Add a new batch of records (given by merger.getOutput()) to the spill
            // file.
            // 
            // note that addBatch also clears the merger's output container
            newGroup.addBatch(dest);
        }
        context.injectChecked(ExternalSortBatch.INTERRUPTION_WHILE_SPILLING, IOException.class);
        newGroup.closeWriter();
        logger.trace("Spilled {} output batches, each of {} bytes, {} records, to {}", merger.getBatchCount(), merger.getEstBatchSize(), spillBatchRowCount, outputFile);
        newGroup.setBatchSize(merger.getEstBatchSize());
        return newGroup;
    } catch (Throwable e) {
        // we only need to clean up newGroup if spill failed
        try {
            if (newGroup != null) {
                AutoCloseables.close(e, newGroup);
            }
        } catch (Throwable t) {
        /* close() may hit the same IO issue; just ignore */
        }
        throw e;
    }
}
Also used : SpilledRun(org.apache.drill.exec.physical.impl.xsort.managed.BatchGroup.SpilledRun) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 77 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by axbaretto.

the class ResultSetLoaderImpl method harvest.

@Override
public VectorContainer harvest() {
    int rowCount;
    switch(state) {
        case ACTIVE:
        case FULL_BATCH:
            rowCount = harvestNormalBatch();
            logger.trace("Harvesting {} rows", rowCount);
            break;
        case OVERFLOW:
            rowCount = harvestOverflowBatch();
            logger.trace("Harvesting {} rows after overflow", rowCount);
            break;
        default:
            throw new IllegalStateException("Unexpected state: " + state);
    }
    // Build the output container
    VectorContainer container = outputContainer();
    container.setRecordCount(rowCount);
    // Finalize: update counts, set state.
    harvestBatchCount++;
    previousRowCount += rowCount;
    return container;
}
Also used : VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 78 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by axbaretto.

the class TestBatchSerialization method verifySerialize.

/**
 * Verify serialize and deserialize. Need to pass both the
 * input and expected (even though the expected should be the same
 * data as the input) because the act of serializing clears the
 * input for obscure historical reasons.
 *
 * @param rowSet
 * @param expected
 * @throws IOException
 */
private void verifySerialize(SingleRowSet rowSet, SingleRowSet expected) throws IOException {
    File dir = DirTestWatcher.createTempDir(dirTestWatcher.getDir());
    FileChannel channel = FileChannel.open(new File(dir, "serialize.dat").toPath(), StandardOpenOption.CREATE, StandardOpenOption.WRITE);
    VectorSerializer.Writer writer = VectorSerializer.writer(channel);
    VectorContainer container = rowSet.container();
    SelectionVector2 sv2 = rowSet.getSv2();
    writer.write(container, sv2);
    container.clear();
    if (sv2 != null) {
        sv2.clear();
    }
    writer.close();
    File outFile = new File(dir, "serialize.dat");
    assertTrue(outFile.exists());
    assertTrue(outFile.isFile());
    RowSet result;
    try (InputStream in = new BufferedInputStream(new FileInputStream(outFile))) {
        Reader reader = VectorSerializer.reader(fixture.allocator(), in);
        result = fixture.wrap(reader.read(), reader.sv2());
    }
    new RowSetComparison(expected).verifyAndClearAll(result);
    outFile.delete();
}
Also used : RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) BufferedInputStream(java.io.BufferedInputStream) FileChannel(java.nio.channels.FileChannel) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) ExtendableRowSet(org.apache.drill.test.rowSet.RowSet.ExtendableRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) SelectionVector2(org.apache.drill.exec.record.selection.SelectionVector2) Reader(org.apache.drill.exec.cache.VectorSerializer.Reader) File(java.io.File) FileInputStream(java.io.FileInputStream) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 79 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by axbaretto.

the class TestWriteToDisk method test.

@Test
@SuppressWarnings("static-method")
public void test() throws Exception {
    final List<ValueVector> vectorList = Lists.newArrayList();
    final DrillConfig config = DrillConfig.create();
    try (final RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
        final Drillbit bit = new Drillbit(config, serviceSet)) {
        bit.run();
        final DrillbitContext context = bit.getContext();
        final MaterializedField intField = MaterializedField.create("int", Types.required(TypeProtos.MinorType.INT));
        final MaterializedField binField = MaterializedField.create("binary", Types.required(TypeProtos.MinorType.VARBINARY));
        try (final IntVector intVector = (IntVector) TypeHelper.getNewVector(intField, context.getAllocator());
            final VarBinaryVector binVector = (VarBinaryVector) TypeHelper.getNewVector(binField, context.getAllocator())) {
            AllocationHelper.allocate(intVector, 4, 4);
            AllocationHelper.allocate(binVector, 4, 5);
            vectorList.add(intVector);
            vectorList.add(binVector);
            intVector.getMutator().setSafe(0, 0);
            binVector.getMutator().setSafe(0, "ZERO".getBytes());
            intVector.getMutator().setSafe(1, 1);
            binVector.getMutator().setSafe(1, "ONE".getBytes());
            intVector.getMutator().setSafe(2, 2);
            binVector.getMutator().setSafe(2, "TWO".getBytes());
            intVector.getMutator().setSafe(3, 3);
            binVector.getMutator().setSafe(3, "THREE".getBytes());
            intVector.getMutator().setValueCount(4);
            binVector.getMutator().setValueCount(4);
            VectorContainer container = new VectorContainer();
            container.addCollection(vectorList);
            container.setRecordCount(4);
            @SuppressWarnings("resource") WritableBatch batch = WritableBatch.getBatchNoHVWrap(container.getRecordCount(), container, false);
            VectorAccessibleSerializable wrap = new VectorAccessibleSerializable(batch, context.getAllocator());
            final VectorAccessibleSerializable newWrap = new VectorAccessibleSerializable(context.getAllocator());
            try (final FileSystem fs = getLocalFileSystem()) {
                final File tempDir = Files.createTempDir();
                tempDir.deleteOnExit();
                final Path path = new Path(tempDir.getAbsolutePath(), "drillSerializable");
                try (final FSDataOutputStream out = fs.create(path)) {
                    wrap.writeToStream(out);
                }
                try (final FSDataInputStream in = fs.open(path)) {
                    newWrap.readFromStream(in);
                }
            }
            final VectorAccessible newContainer = newWrap.get();
            for (VectorWrapper<?> w : newContainer) {
                try (ValueVector vv = w.getValueVector()) {
                    int values = vv.getAccessor().getValueCount();
                    for (int i = 0; i < values; i++) {
                        final Object o = vv.getAccessor().getObject(i);
                        if (o instanceof byte[]) {
                            System.out.println(new String((byte[]) o));
                        } else {
                            System.out.println(o);
                        }
                    }
                }
            }
        }
    }
}
Also used : DrillbitContext(org.apache.drill.exec.server.DrillbitContext) Path(org.apache.hadoop.fs.Path) IntVector(org.apache.drill.exec.vector.IntVector) VectorAccessible(org.apache.drill.exec.record.VectorAccessible) MaterializedField(org.apache.drill.exec.record.MaterializedField) VarBinaryVector(org.apache.drill.exec.vector.VarBinaryVector) VectorContainer(org.apache.drill.exec.record.VectorContainer) ValueVector(org.apache.drill.exec.vector.ValueVector) DrillConfig(org.apache.drill.common.config.DrillConfig) Drillbit(org.apache.drill.exec.server.Drillbit) RemoteServiceSet(org.apache.drill.exec.server.RemoteServiceSet) FileSystem(org.apache.hadoop.fs.FileSystem) WritableBatch(org.apache.drill.exec.record.WritableBatch) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) File(java.io.File) ExecTest(org.apache.drill.exec.ExecTest) Test(org.junit.Test)

Example 80 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by axbaretto.

the class TopNBatchTest method priorityQueueOrderingTest.

/**
 * Priority queue unit test.
 * @throws Exception
 */
@Test
public void priorityQueueOrderingTest() throws Exception {
    Properties properties = new Properties();
    DrillConfig drillConfig = DrillConfig.create(properties);
    FieldReference expr = FieldReference.getWithQuotedRef("colA");
    Order.Ordering ordering = new Order.Ordering(Order.Ordering.ORDER_DESC, expr, Order.Ordering.NULLS_FIRST);
    List<Order.Ordering> orderings = Lists.newArrayList(ordering);
    MaterializedField colA = MaterializedField.create("colA", Types.required(TypeProtos.MinorType.INT));
    MaterializedField colB = MaterializedField.create("colB", Types.required(TypeProtos.MinorType.INT));
    List<MaterializedField> cols = Lists.newArrayList(colA, colB);
    BatchSchema batchSchema = new BatchSchema(BatchSchema.SelectionVectorMode.NONE, cols);
    RowSet expectedRowSet;
    try (RootAllocator allocator = new RootAllocator(100_000_000)) {
        expectedRowSet = new RowSetBuilder(allocator, batchSchema).addRow(110, 10).addRow(109, 9).addRow(108, 8).addRow(107, 7).addRow(106, 6).addRow(105, 5).addRow(104, 4).addRow(103, 3).addRow(102, 2).addRow(101, 1).build();
        PriorityQueue queue;
        ExpandableHyperContainer hyperContainer;
        {
            VectorContainer container = new RowSetBuilder(allocator, batchSchema).build().container();
            hyperContainer = new ExpandableHyperContainer(container);
            queue = TopNBatch.createNewPriorityQueue(TopNBatch.createMainMappingSet(), TopNBatch.createLeftMappingSet(), TopNBatch.createRightMappingSet(), optionManager, new FunctionImplementationRegistry(drillConfig), new CodeCompiler(drillConfig, optionManager), orderings, hyperContainer, false, true, 10, allocator, batchSchema.getSelectionVectorMode());
        }
        List<RecordBatchData> testBatches = Lists.newArrayList();
        try {
            final Random random = new Random();
            final int bound = 100;
            final int numBatches = 11;
            final int numRecordsPerBatch = 100;
            for (int batchCounter = 0; batchCounter < numBatches; batchCounter++) {
                RowSetBuilder rowSetBuilder = new RowSetBuilder(allocator, batchSchema);
                rowSetBuilder.addRow((batchCounter + bound), batchCounter);
                for (int recordCounter = 0; recordCounter < numRecordsPerBatch; recordCounter++) {
                    rowSetBuilder.addRow(random.nextInt(bound), random.nextInt(bound));
                }
                VectorContainer vectorContainer = rowSetBuilder.build().container();
                queue.add(new RecordBatchData(vectorContainer, allocator));
            }
            queue.generate();
            VectorContainer resultContainer = queue.getHyperBatch();
            resultContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
            RowSet.HyperRowSet actualHyperSet = HyperRowSetImpl.fromContainer(resultContainer, queue.getFinalSv4());
            new RowSetComparison(expectedRowSet).verify(actualHyperSet);
        } finally {
            if (expectedRowSet != null) {
                expectedRowSet.clear();
            }
            queue.cleanup();
            hyperContainer.clear();
            for (RecordBatchData testBatch : testBatches) {
                testBatch.clear();
            }
        }
    }
}
Also used : Order(org.apache.drill.common.logical.data.Order) ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) FieldReference(org.apache.drill.common.expression.FieldReference) RecordBatchData(org.apache.drill.exec.physical.impl.sort.RecordBatchData) RowSet(org.apache.drill.test.rowSet.RowSet) MaterializedField(org.apache.drill.exec.record.MaterializedField) Properties(java.util.Properties) VectorContainer(org.apache.drill.exec.record.VectorContainer) RowSetBuilder(org.apache.drill.test.rowSet.RowSetBuilder) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) RootAllocator(org.apache.drill.exec.memory.RootAllocator) DrillConfig(org.apache.drill.common.config.DrillConfig) Random(java.util.Random) BatchSchema(org.apache.drill.exec.record.BatchSchema) CodeCompiler(org.apache.drill.exec.compile.CodeCompiler) FunctionImplementationRegistry(org.apache.drill.exec.expr.fn.FunctionImplementationRegistry) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test)

Aggregations

VectorContainer (org.apache.drill.exec.record.VectorContainer)178 Test (org.junit.Test)75 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)63 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)62 SubOperatorTest (org.apache.drill.test.SubOperatorTest)60 ValueVector (org.apache.drill.exec.vector.ValueVector)44 RowSetTest (org.apache.drill.categories.RowSetTest)41 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)39 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)31 BatchSchema (org.apache.drill.exec.record.BatchSchema)27 ArrayList (java.util.ArrayList)23 MaterializedField (org.apache.drill.exec.record.MaterializedField)23 ResultSetLoader (org.apache.drill.exec.physical.resultSet.ResultSetLoader)18 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)17 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)16 UserException (org.apache.drill.common.exceptions.UserException)15 RowSetLoader (org.apache.drill.exec.physical.resultSet.RowSetLoader)15 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)15 OperatorTest (org.apache.drill.categories.OperatorTest)14 MockRecordBatch (org.apache.drill.exec.physical.impl.MockRecordBatch)14