Search in sources :

Example 1 with SpillSet

use of org.apache.drill.exec.physical.impl.spill.SpillSet in project drill by apache.

the class HashPartitionTest method noSpillBuildSideTest.

@Test
public void noSpillBuildSideTest() throws Exception {
    new HashPartitionFixture().run(new HashPartitionTestCase() {

        private RowSet buildRowSet;

        private RowSet probeRowSet;

        @Override
        public CloseableRecordBatch createBuildBatch(BatchSchema schema, FragmentContext context) {
            buildRowSet = new RowSetBuilder(context.getAllocator(), schema).addRow(1, "green").addRow(3, "red").addRow(2, "blue").build();
            return new MockRecordBatch.Builder().sendData(buildRowSet).build(context);
        }

        @Override
        public void createResultBuildBatch(BatchSchema schema, FragmentContext context) {
        }

        @Override
        public CloseableRecordBatch createProbeBatch(BatchSchema schema, FragmentContext context) {
            probeRowSet = new RowSetBuilder(context.getAllocator(), schema).addRow(.5f, "yellow").addRow(1.5f, "blue").addRow(2.5f, "black").build();
            return new MockRecordBatch.Builder().sendData(probeRowSet).build(context);
        }

        @Override
        public void run(SpillSet spillSet, BatchSchema buildSchema, BatchSchema probeSchema, RecordBatch buildBatch, RecordBatch probeBatch, ChainedHashTable baseHashTable, FragmentContext context, OperatorContext operatorContext) throws Exception {
            final HashPartition hashPartition = new HashPartition(context, context.getAllocator(), baseHashTable, buildBatch, probeBatch, false, 10, spillSet, 0, 0, // only '1' has a special treatment
            2);
            final HashJoinMemoryCalculator.BuildSidePartitioning noopCalc = new HashJoinMemoryCalculatorImpl.NoopBuildSidePartitioningImpl();
            hashPartition.appendInnerRow(buildBatch.getContainer(), 0, 10, noopCalc);
            hashPartition.appendInnerRow(buildBatch.getContainer(), 1, 11, noopCalc);
            hashPartition.appendInnerRow(buildBatch.getContainer(), 2, 12, noopCalc);
            hashPartition.completeAnInnerBatch(false, false);
            hashPartition.buildContainersHashTableAndHelper();
            {
                int compositeIndex = hashPartition.probeForKey(0, 16);
                Assert.assertEquals(-1, compositeIndex);
            }
            {
                int compositeIndex = hashPartition.probeForKey(1, 12);
                int startIndex = hashPartition.getStartIndex(compositeIndex).getLeft();
                int nextIndex = hashPartition.getNextIndex(startIndex);
                Assert.assertEquals(2, startIndex);
                Assert.assertEquals(-1, nextIndex);
            }
            {
                int compositeIndex = hashPartition.probeForKey(2, 15);
                Assert.assertEquals(-1, compositeIndex);
            }
            buildRowSet.clear();
            probeRowSet.clear();
            hashPartition.close();
        }
    });
}
Also used : HashJoinMemoryCalculatorImpl(org.apache.drill.exec.physical.impl.join.HashJoinMemoryCalculatorImpl) FragmentContext(org.apache.drill.exec.ops.FragmentContext) MockRecordBatch(org.apache.drill.exec.physical.impl.MockRecordBatch) SpilledRecordBatch(org.apache.drill.exec.physical.impl.aggregate.SpilledRecordBatch) RecordBatch(org.apache.drill.exec.record.RecordBatch) CloseableRecordBatch(org.apache.drill.exec.record.CloseableRecordBatch) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) RowSetBuilder(org.apache.drill.exec.physical.rowSet.RowSetBuilder) BatchSchema(org.apache.drill.exec.record.BatchSchema) OperatorContext(org.apache.drill.exec.ops.OperatorContext) CloseableRecordBatch(org.apache.drill.exec.record.CloseableRecordBatch) SpillSet(org.apache.drill.exec.physical.impl.spill.SpillSet) MockRecordBatch(org.apache.drill.exec.physical.impl.MockRecordBatch) BaseTest(org.apache.drill.test.BaseTest) Test(org.junit.Test)

Example 2 with SpillSet

use of org.apache.drill.exec.physical.impl.spill.SpillSet in project drill by apache.

the class HashAggTemplate method setup.

@Override
public void setup(HashAggregate hashAggrConfig, HashTableConfig htConfig, FragmentContext context, OperatorContext oContext, RecordBatch incoming, HashAggBatch outgoing, LogicalExpression[] valueExprs, List<TypedFieldId> valueFieldIds, ClassGenerator<?> cg, TypedFieldId[] groupByOutFieldIds, VectorContainer outContainer, int extraRowBytes) {
    if (valueExprs == null || valueFieldIds == null) {
        throw new IllegalArgumentException("Invalid aggr value exprs or workspace variables.");
    }
    if (valueFieldIds.size() < valueExprs.length) {
        throw new IllegalArgumentException("Wrong number of workspace variables.");
    }
    this.context = context;
    this.stats = oContext.getStats();
    this.allocator = oContext.getAllocator();
    this.updater = new HashAggUpdater(allocator);
    this.oContext = oContext;
    this.incoming = incoming;
    this.outgoing = outgoing;
    this.cg = cg;
    this.outContainer = outContainer;
    this.useMemoryPrediction = context.getOptions().getOption(ExecConstants.HASHAGG_USE_MEMORY_PREDICTION_VALIDATOR);
    this.phase = hashAggrConfig.getAggPhase();
    // single phase can not spill
    canSpill = phase.hasTwo();
    // Typically for testing - force a spill after a partition has more than so many batches
    minBatchesPerPartition = context.getOptions().getOption(ExecConstants.HASHAGG_MIN_BATCHES_PER_PARTITION_VALIDATOR);
    // Set the memory limit
    long memoryLimit = allocator.getLimit();
    // Optional configured memory limit, typically used only for testing.
    long configLimit = context.getOptions().getOption(ExecConstants.HASHAGG_MAX_MEMORY_VALIDATOR);
    if (configLimit > 0) {
        logger.warn("Memory limit was changed to {}", configLimit);
        memoryLimit = Math.min(memoryLimit, configLimit);
        // enforce at the allocator
        allocator.setLimit(memoryLimit);
    }
    // TODO:  This functionality will be added later.
    if (hashAggrConfig.getGroupByExprs().size() == 0) {
        throw new IllegalArgumentException("Currently, hash aggregation is only applicable if there are group-by " + "expressions.");
    }
    htIdxHolder = new IndexPointer();
    materializedValueFields = new MaterializedField[valueFieldIds.size()];
    if (valueFieldIds.size() > 0) {
        int i = 0;
        FieldReference ref = new FieldReference("dummy", ExpressionPosition.UNKNOWN, valueFieldIds.get(0).getIntermediateType());
        for (TypedFieldId id : valueFieldIds) {
            materializedValueFields[i++] = MaterializedField.create(ref.getAsNamePart().getName(), id.getIntermediateType());
        }
    }
    spillSet = new SpillSet(context, hashAggrConfig);
    baseHashTable = new ChainedHashTable(htConfig, context, allocator, incoming, null, /* no incoming probe */
    outgoing);
    // retain these for delayedSetup, and to allow recreating hash tables (after a spill)
    this.groupByOutFieldIds = groupByOutFieldIds;
    numGroupByOutFields = groupByOutFieldIds.length;
    // Start calculating the row widths (with the extra columns; the rest would be done in updateEstMaxBatchSize())
    estRowWidth = extraRowBytes;
    estValuesRowWidth = extraRowBytes;
    try {
        doSetup(incoming);
    } catch (SchemaChangeException e) {
        throw HashAggBatch.schemaChangeException(e, "Hash Aggregate", logger);
    }
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) FieldReference(org.apache.drill.common.expression.FieldReference) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) IndexPointer(org.apache.drill.exec.physical.impl.common.IndexPointer) SpillSet(org.apache.drill.exec.physical.impl.spill.SpillSet) ChainedHashTable(org.apache.drill.exec.physical.impl.common.ChainedHashTable)

Example 3 with SpillSet

use of org.apache.drill.exec.physical.impl.spill.SpillSet in project drill by axbaretto.

the class TestSortImpl method makeSortImpl.

/**
 * Create the sort implementation to be used by test.
 *
 * @param fixture operator fixture
 * @param sortOrder sort order as specified by {@link Ordering}
 * @param nullOrder null order as specified by {@link Ordering}
 * @param outputBatch where the sort should write its output
 * @return the sort initialized sort implementation, ready to
 * do work
 */
public static SortImpl makeSortImpl(OperatorFixture fixture, String sortOrder, String nullOrder, VectorContainer outputBatch) {
    FieldReference expr = FieldReference.getWithQuotedRef("key");
    Ordering ordering = new Ordering(sortOrder, expr, nullOrder);
    Sort popConfig = new Sort(null, Lists.newArrayList(ordering), false);
    OperatorContext opContext = fixture.newOperatorContext(popConfig);
    QueryId queryId = QueryId.newBuilder().setPart1(1234).setPart2(5678).build();
    FragmentHandle handle = FragmentHandle.newBuilder().setMajorFragmentId(2).setMinorFragmentId(3).setQueryId(queryId).build();
    SortConfig sortConfig = new SortConfig(opContext.getFragmentContext().getConfig(), opContext.getFragmentContext().getOptions());
    SpillSet spillSet = new SpillSet(opContext.getFragmentContext().getConfig(), handle, popConfig);
    PriorityQueueCopierWrapper copierHolder = new PriorityQueueCopierWrapper(opContext);
    SpilledRuns spilledRuns = new SpilledRuns(opContext, spillSet, copierHolder);
    return new SortImpl(opContext, sortConfig, spilledRuns, outputBatch);
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) OperatorContext(org.apache.drill.exec.ops.OperatorContext) QueryId(org.apache.drill.exec.proto.UserBitShared.QueryId) Ordering(org.apache.drill.common.logical.data.Order.Ordering) Sort(org.apache.drill.exec.physical.config.Sort) FragmentHandle(org.apache.drill.exec.proto.ExecProtos.FragmentHandle) SpillSet(org.apache.drill.exec.physical.impl.spill.SpillSet)

Example 4 with SpillSet

use of org.apache.drill.exec.physical.impl.spill.SpillSet in project drill by axbaretto.

the class HashAggTemplate method setup.

@Override
public void setup(HashAggregate hashAggrConfig, HashTableConfig htConfig, FragmentContext context, OperatorContext oContext, RecordBatch incoming, HashAggBatch outgoing, LogicalExpression[] valueExprs, List<TypedFieldId> valueFieldIds, TypedFieldId[] groupByOutFieldIds, VectorContainer outContainer, int extraRowBytes) throws SchemaChangeException, IOException {
    if (valueExprs == null || valueFieldIds == null) {
        throw new IllegalArgumentException("Invalid aggr value exprs or workspace variables.");
    }
    if (valueFieldIds.size() < valueExprs.length) {
        throw new IllegalArgumentException("Wrong number of workspace variables.");
    }
    this.context = context;
    this.stats = oContext.getStats();
    this.allocator = oContext.getAllocator();
    this.oContext = oContext;
    this.incoming = incoming;
    this.outgoing = outgoing;
    this.outContainer = outContainer;
    this.operatorId = hashAggrConfig.getOperatorId();
    this.useMemoryPrediction = context.getOptions().getOption(ExecConstants.HASHAGG_USE_MEMORY_PREDICTION_VALIDATOR);
    is2ndPhase = hashAggrConfig.getAggPhase() == AggPrelBase.OperatorPhase.PHASE_2of2;
    isTwoPhase = hashAggrConfig.getAggPhase() != AggPrelBase.OperatorPhase.PHASE_1of1;
    is1stPhase = isTwoPhase && !is2ndPhase;
    // single phase can not spill
    canSpill = isTwoPhase;
    // Typically for testing - force a spill after a partition has more than so many batches
    minBatchesPerPartition = context.getOptions().getOption(ExecConstants.HASHAGG_MIN_BATCHES_PER_PARTITION_VALIDATOR);
    // Set the memory limit
    long memoryLimit = allocator.getLimit();
    // Optional configured memory limit, typically used only for testing.
    long configLimit = context.getOptions().getOption(ExecConstants.HASHAGG_MAX_MEMORY_VALIDATOR);
    if (configLimit > 0) {
        logger.warn("Memory limit was changed to {}", configLimit);
        memoryLimit = Math.min(memoryLimit, configLimit);
        // enforce at the allocator
        allocator.setLimit(memoryLimit);
    }
    // TODO:  This functionality will be added later.
    if (hashAggrConfig.getGroupByExprs().size() == 0) {
        throw new IllegalArgumentException("Currently, hash aggregation is only applicable if there are group-by " + "expressions.");
    }
    this.htIdxHolder = new IndexPointer();
    this.outStartIdxHolder = new IndexPointer();
    this.outNumRecordsHolder = new IndexPointer();
    materializedValueFields = new MaterializedField[valueFieldIds.size()];
    if (valueFieldIds.size() > 0) {
        int i = 0;
        FieldReference ref = new FieldReference("dummy", ExpressionPosition.UNKNOWN, valueFieldIds.get(0).getIntermediateType());
        for (TypedFieldId id : valueFieldIds) {
            materializedValueFields[i++] = MaterializedField.create(ref.getAsNamePart().getName(), id.getIntermediateType());
        }
    }
    spillSet = new SpillSet(context, hashAggrConfig);
    baseHashTable = new ChainedHashTable(htConfig, context, allocator, incoming, null, /* no incoming probe */
    outgoing);
    // retain these for delayedSetup, and to allow recreating hash tables (after a spill)
    this.groupByOutFieldIds = groupByOutFieldIds;
    numGroupByOutFields = groupByOutFieldIds.length;
    // Start calculating the row widths (with the extra columns; the rest would be done in updateEstMaxBatchSize() )
    estRowWidth = extraRowBytes;
    estValuesRowWidth = extraRowBytes;
    doSetup(incoming);
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) IndexPointer(org.apache.drill.exec.physical.impl.common.IndexPointer) SpillSet(org.apache.drill.exec.physical.impl.spill.SpillSet) ChainedHashTable(org.apache.drill.exec.physical.impl.common.ChainedHashTable)

Example 5 with SpillSet

use of org.apache.drill.exec.physical.impl.spill.SpillSet in project drill by apache.

the class TestSortImpl method makeSortImpl.

/**
 * Create the sort implementation to be used by test.
 *
 * @param fixture operator fixture
 * @param sortOrder sort order as specified by {@link Ordering}
 * @param nullOrder null order as specified by {@link Ordering}
 * @return the initialized sort implementation, ready to do work
 */
public static SortImpl makeSortImpl(OperatorFixture fixture, String sortOrder, String nullOrder) {
    FieldReference expr = FieldReference.getWithQuotedRef("key");
    Ordering ordering = new Ordering(sortOrder, expr, nullOrder);
    Sort popConfig = new Sort(null, Lists.newArrayList(ordering), false);
    OperatorContext opContext = fixture.newOperatorContext(popConfig);
    QueryId queryId = QueryId.newBuilder().setPart1(1234).setPart2(5678).build();
    FragmentHandle handle = FragmentHandle.newBuilder().setMajorFragmentId(2).setMinorFragmentId(3).setQueryId(queryId).build();
    SortConfig sortConfig = new SortConfig(opContext.getFragmentContext().getConfig(), opContext.getFragmentContext().getOptions());
    SpillSet spillSet = new SpillSet(opContext.getFragmentContext().getConfig(), handle, popConfig);
    PriorityQueueCopierWrapper copierHolder = new PriorityQueueCopierWrapper(opContext);
    SpilledRuns spilledRuns = new SpilledRuns(opContext, spillSet, copierHolder);
    dest = new VectorContainer(opContext.getAllocator());
    return new SortImpl(opContext, sortConfig, spilledRuns, dest);
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) OperatorContext(org.apache.drill.exec.ops.OperatorContext) QueryId(org.apache.drill.exec.proto.UserBitShared.QueryId) Ordering(org.apache.drill.common.logical.data.Order.Ordering) Sort(org.apache.drill.exec.physical.config.Sort) FragmentHandle(org.apache.drill.exec.proto.ExecProtos.FragmentHandle) SpillSet(org.apache.drill.exec.physical.impl.spill.SpillSet) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Aggregations

SpillSet (org.apache.drill.exec.physical.impl.spill.SpillSet)6 FieldReference (org.apache.drill.common.expression.FieldReference)4 OperatorContext (org.apache.drill.exec.ops.OperatorContext)3 Ordering (org.apache.drill.common.logical.data.Order.Ordering)2 Sort (org.apache.drill.exec.physical.config.Sort)2 ChainedHashTable (org.apache.drill.exec.physical.impl.common.ChainedHashTable)2 IndexPointer (org.apache.drill.exec.physical.impl.common.IndexPointer)2 FragmentHandle (org.apache.drill.exec.proto.ExecProtos.FragmentHandle)2 QueryId (org.apache.drill.exec.proto.UserBitShared.QueryId)2 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)2 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)1 FragmentContext (org.apache.drill.exec.ops.FragmentContext)1 MockRecordBatch (org.apache.drill.exec.physical.impl.MockRecordBatch)1 SpilledRecordBatch (org.apache.drill.exec.physical.impl.aggregate.SpilledRecordBatch)1 HashJoinMemoryCalculatorImpl (org.apache.drill.exec.physical.impl.join.HashJoinMemoryCalculatorImpl)1 DirectRowSet (org.apache.drill.exec.physical.rowSet.DirectRowSet)1 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)1 RowSetBuilder (org.apache.drill.exec.physical.rowSet.RowSetBuilder)1 BatchSchema (org.apache.drill.exec.record.BatchSchema)1 CloseableRecordBatch (org.apache.drill.exec.record.CloseableRecordBatch)1