Search in sources :

Example 11 with SelectionVector4

use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.

the class OrderedPartitionRecordBatch method buildTable.

private void buildTable() throws SchemaChangeException, ClassTransformationException, IOException {
    // Get all samples from distributed map
    @SuppressWarnings("resource") SortRecordBatchBuilder containerBuilder = new SortRecordBatchBuilder(context.getAllocator());
    final VectorContainer allSamplesContainer = new VectorContainer();
    final VectorContainer candidatePartitionTable = new VectorContainer();
    CachedVectorContainer wrap = null;
    try {
        for (CachedVectorContainer w : mmap.get(mapKey)) {
            containerBuilder.add(w.get());
        }
        containerBuilder.build(context, allSamplesContainer);
        List<Ordering> orderDefs = Lists.newArrayList();
        int i = 0;
        for (Ordering od : popConfig.getOrderings()) {
            SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
            orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
        }
        // sort the data incoming samples.
        @SuppressWarnings("resource") SelectionVector4 newSv4 = containerBuilder.getSv4();
        Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
        sorter.setup(context, newSv4, allSamplesContainer);
        sorter.sort(newSv4, allSamplesContainer);
        // Copy every Nth record from the samples into a candidate partition table, where N = totalSampledRecords/partitions
        // Attempt to push this to the distributed map. Only the first candidate to get pushed will be used.
        SampleCopier copier = null;
        List<ValueVector> localAllocationVectors = Lists.newArrayList();
        copier = getCopier(newSv4, allSamplesContainer, candidatePartitionTable, orderDefs, localAllocationVectors);
        int allocationSize = 50;
        while (true) {
            for (ValueVector vv : localAllocationVectors) {
                AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
            }
            int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
            if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
                assert copier.getOutputRecords() == partitions - 1 : String.format("output records: %d partitions: %d", copier.getOutputRecords(), partitions);
                for (VectorWrapper<?> vw : candidatePartitionTable) {
                    vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
                }
                break;
            } else {
                candidatePartitionTable.zeroVectors();
                allocationSize *= 2;
            }
        }
        candidatePartitionTable.setRecordCount(copier.getOutputRecords());
        @SuppressWarnings("resource") WritableBatch batch = WritableBatch.getBatchNoHVWrap(candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
        wrap = new CachedVectorContainer(batch, context.getDrillbitContext().getAllocator());
        tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);
    } finally {
        candidatePartitionTable.clear();
        allSamplesContainer.clear();
        containerBuilder.clear();
        containerBuilder.close();
        if (wrap != null) {
            wrap.clear();
        }
    }
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) VectorContainer(org.apache.drill.exec.record.VectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaPath(org.apache.drill.common.expression.SchemaPath) Ordering(org.apache.drill.common.logical.data.Order.Ordering) Sorter(org.apache.drill.exec.physical.impl.sort.Sorter) WritableBatch(org.apache.drill.exec.record.WritableBatch) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 12 with SelectionVector4

use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.

the class DrillTestWrapper method addToCombinedVectorResults.

/**
   * Add to result vectors and compare batch schema against expected schema while iterating batches.
   * @param batches
   * @param  expectedSchema: the expected schema the batches should contain. Through SchemaChangeException
   *                       if encounter different batch schema.
   * @return
   * @throws SchemaChangeException
   * @throws UnsupportedEncodingException
   */
public static Map<String, List<Object>> addToCombinedVectorResults(Iterable<VectorAccessible> batches, BatchSchema expectedSchema) throws SchemaChangeException, UnsupportedEncodingException {
    // TODO - this does not handle schema changes
    Map<String, List<Object>> combinedVectors = new TreeMap<>();
    long totalRecords = 0;
    BatchSchema schema = null;
    for (VectorAccessible loader : batches) {
        if (expectedSchema != null) {
            if (!expectedSchema.equals(loader.getSchema())) {
                throw new SchemaChangeException(String.format("Batch schema does not match expected schema\n" + "Actual schema: %s.  Expected schema : %s", loader.getSchema(), expectedSchema));
            }
        }
        // SchemaChangeException, so check/clean throws clause above.
        if (schema == null) {
            schema = loader.getSchema();
            for (MaterializedField mf : schema) {
                combinedVectors.put(SchemaPath.getSimplePath(mf.getPath()).toExpr(), new ArrayList<Object>());
            }
        } else {
            // TODO - actually handle schema changes, this is just to get access to the SelectionVectorMode
            // of the current batch, the check for a null schema is used to only mutate the schema once
            // need to add new vectors and null fill for previous batches? distinction between null and non-existence important?
            schema = loader.getSchema();
        }
        logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
        totalRecords += loader.getRecordCount();
        for (VectorWrapper<?> w : loader) {
            String field = SchemaPath.getSimplePath(w.getField().getPath()).toExpr();
            ValueVector[] vectors;
            if (w.isHyper()) {
                vectors = w.getValueVectors();
            } else {
                vectors = new ValueVector[] { w.getValueVector() };
            }
            SelectionVector2 sv2 = null;
            SelectionVector4 sv4 = null;
            switch(schema.getSelectionVectorMode()) {
                case TWO_BYTE:
                    sv2 = loader.getSelectionVector2();
                    break;
                case FOUR_BYTE:
                    sv4 = loader.getSelectionVector4();
                    break;
            }
            if (sv4 != null) {
                for (int j = 0; j < sv4.getCount(); j++) {
                    int complexIndex = sv4.get(j);
                    int batchIndex = complexIndex >> 16;
                    int recordIndexInBatch = complexIndex & 65535;
                    Object obj = vectors[batchIndex].getAccessor().getObject(recordIndexInBatch);
                    if (obj != null) {
                        if (obj instanceof Text) {
                            obj = obj.toString();
                        }
                    }
                    combinedVectors.get(field).add(obj);
                }
            } else {
                for (ValueVector vv : vectors) {
                    for (int j = 0; j < loader.getRecordCount(); j++) {
                        int index;
                        if (sv2 != null) {
                            index = sv2.getIndex(j);
                        } else {
                            index = j;
                        }
                        Object obj = vv.getAccessor().getObject(index);
                        if (obj != null) {
                            if (obj instanceof Text) {
                                obj = obj.toString();
                            }
                        }
                        combinedVectors.get(field).add(obj);
                    }
                }
            }
        }
    }
    return combinedVectors;
}
Also used : VectorAccessible(org.apache.drill.exec.record.VectorAccessible) MaterializedField(org.apache.drill.exec.record.MaterializedField) Text(org.apache.drill.exec.util.Text) TreeMap(java.util.TreeMap) ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) BatchSchema(org.apache.drill.exec.record.BatchSchema) SelectionVector2(org.apache.drill.exec.record.selection.SelectionVector2) ArrayList(java.util.ArrayList) List(java.util.List) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 13 with SelectionVector4

use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.

the class TopNBatch method purge.

private void purge() throws SchemaChangeException {
    Stopwatch watch = Stopwatch.createStarted();
    VectorContainer c = priorityQueue.getHyperBatch();
    VectorContainer newContainer = new VectorContainer(oContext);
    @SuppressWarnings("resource") SelectionVector4 selectionVector4 = priorityQueue.getHeapSv4();
    SimpleRecordBatch batch = new SimpleRecordBatch(c, selectionVector4, context);
    SimpleRecordBatch newBatch = new SimpleRecordBatch(newContainer, null, context);
    if (copier == null) {
        copier = RemovingRecordBatch.getGenerated4Copier(batch, context, oContext.getAllocator(), newContainer, newBatch, null);
    } else {
        for (VectorWrapper<?> i : batch) {
            @SuppressWarnings("resource") ValueVector v = TypeHelper.getNewVector(i.getField(), oContext.getAllocator());
            newContainer.add(v);
        }
        copier.setupRemover(context, batch, newBatch);
    }
    @SuppressWarnings("resource") SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
    try {
        do {
            int count = selectionVector4.getCount();
            int copiedRecords = copier.copyRecords(0, count);
            assert copiedRecords == count;
            for (VectorWrapper<?> v : newContainer) {
                ValueVector.Mutator m = v.getValueVector().getMutator();
                m.setValueCount(count);
            }
            newContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
            newContainer.setRecordCount(count);
            builder.add(newBatch);
        } while (selectionVector4.next());
        selectionVector4.clear();
        c.clear();
        VectorContainer newQueue = new VectorContainer();
        builder.canonicalize();
        builder.build(context, newQueue);
        priorityQueue.resetQueue(newQueue, builder.getSv4().createNewWrapperCurrent());
        builder.getSv4().clear();
        selectionVector4.clear();
    } finally {
        DrillAutoCloseables.closeNoChecked(builder);
    }
    logger.debug("Took {} us to purge", watch.elapsed(TimeUnit.MICROSECONDS));
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) Stopwatch(com.google.common.base.Stopwatch) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) VectorContainer(org.apache.drill.exec.record.VectorContainer) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 14 with SelectionVector4

use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.

the class PriorityQueueTemplate method resetQueue.

@Override
public void resetQueue(VectorContainer container, SelectionVector4 v4) throws SchemaChangeException {
    assert container.getSchema().getSelectionVectorMode() == BatchSchema.SelectionVectorMode.FOUR_BYTE;
    BatchSchema schema = container.getSchema();
    VectorContainer newContainer = new VectorContainer();
    for (MaterializedField field : schema) {
        int[] ids = container.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds();
        newContainer.add(container.getValueAccessorById(field.getValueClass(), ids).getValueVectors());
    }
    newContainer.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
    // Cleanup before recreating hyperbatch and sv4.
    cleanup();
    hyperBatch = new ExpandableHyperContainer(newContainer);
    batchCount = hyperBatch.iterator().next().getValueVectors().length;
    @SuppressWarnings("resource") final DrillBuf drillBuf = allocator.buffer(4 * (limit + 1));
    heapSv4 = new SelectionVector4(drillBuf, limit, Character.MAX_VALUE);
    // Reset queue size (most likely to be set to limit).
    queueSize = 0;
    for (int i = 0; i < v4.getTotalCount(); i++) {
        heapSv4.set(i, v4.get(i));
        ++queueSize;
    }
    v4.clear();
    doSetup(context, hyperBatch, null);
}
Also used : ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) BatchSchema(org.apache.drill.exec.record.BatchSchema) MaterializedField(org.apache.drill.exec.record.MaterializedField) VectorContainer(org.apache.drill.exec.record.VectorContainer) DrillBuf(io.netty.buffer.DrillBuf) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 15 with SelectionVector4

use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.

the class HashJoinHelper method getNewSV4.

public SelectionVector4 getNewSV4(int recordCount) throws SchemaChangeException {
    ByteBuf vector = allocator.buffer((recordCount * 4));
    SelectionVector4 sv4 = new SelectionVector4(vector, recordCount, recordCount);
    // Initialize the vector
    for (int i = 0; i < recordCount; i++) {
        sv4.set(i, INDEX_EMPTY);
    }
    return sv4;
}
Also used : ByteBuf(io.netty.buffer.ByteBuf) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Aggregations

SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)19 DrillBuf (io.netty.buffer.DrillBuf)8 VectorContainer (org.apache.drill.exec.record.VectorContainer)6 ValueVector (org.apache.drill.exec.vector.ValueVector)6 Stopwatch (com.google.common.base.Stopwatch)4 SortRecordBatchBuilder (org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder)4 BatchSchema (org.apache.drill.exec.record.BatchSchema)3 MaterializedField (org.apache.drill.exec.record.MaterializedField)3 CachedVectorContainer (org.apache.drill.exec.cache.CachedVectorContainer)2 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)2 Sorter (org.apache.drill.exec.physical.impl.sort.Sorter)2 WritableBatch (org.apache.drill.exec.record.WritableBatch)2 ConfigException (com.typesafe.config.ConfigException)1 ByteBuf (io.netty.buffer.ByteBuf)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 TreeMap (java.util.TreeMap)1 FieldReference (org.apache.drill.common.expression.FieldReference)1 SchemaPath (org.apache.drill.common.expression.SchemaPath)1 Ordering (org.apache.drill.common.logical.data.Order.Ordering)1