Search in sources :

Example 1 with IndexPointer

use of org.apache.drill.exec.physical.impl.common.IndexPointer in project drill by apache.

the class HashAggTemplate method setup.

@Override
public void setup(HashAggregate hashAggrConfig, HashTableConfig htConfig, FragmentContext context, OperatorStats stats, BufferAllocator allocator, RecordBatch incoming, HashAggBatch outgoing, LogicalExpression[] valueExprs, List<TypedFieldId> valueFieldIds, TypedFieldId[] groupByOutFieldIds, VectorContainer outContainer) throws SchemaChangeException, ClassTransformationException, IOException {
    if (valueExprs == null || valueFieldIds == null) {
        throw new IllegalArgumentException("Invalid aggr value exprs or workspace variables.");
    }
    if (valueFieldIds.size() < valueExprs.length) {
        throw new IllegalArgumentException("Wrong number of workspace variables.");
    }
    //    this.context = context;
    this.stats = stats;
    this.allocator = allocator;
    this.incoming = incoming;
    //    this.schema = incoming.getSchema();
    this.outgoing = outgoing;
    this.outContainer = outContainer;
    // TODO:  This functionality will be added later.
    if (hashAggrConfig.getGroupByExprs().size() == 0) {
        throw new IllegalArgumentException("Currently, hash aggregation is only applicable if there are group-by " + "expressions.");
    }
    this.htIdxHolder = new IndexPointer();
    this.outStartIdxHolder = new IndexPointer();
    this.outNumRecordsHolder = new IndexPointer();
    materializedValueFields = new MaterializedField[valueFieldIds.size()];
    if (valueFieldIds.size() > 0) {
        int i = 0;
        FieldReference ref = new FieldReference("dummy", ExpressionPosition.UNKNOWN, valueFieldIds.get(0).getIntermediateType());
        for (TypedFieldId id : valueFieldIds) {
            materializedValueFields[i++] = MaterializedField.create(ref.getAsNamePart().getName(), id.getIntermediateType());
        }
    }
    ChainedHashTable ht = new ChainedHashTable(htConfig, context, allocator, incoming, null, /* no incoming probe */
    outgoing);
    this.htable = ht.createAndSetupHashTable(groupByOutFieldIds);
    numGroupByOutFields = groupByOutFieldIds.length;
    batchHolders = new ArrayList<BatchHolder>();
    // First BatchHolder is created when the first put request is received.
    doSetup(incoming);
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) IndexPointer(org.apache.drill.exec.physical.impl.common.IndexPointer) ChainedHashTable(org.apache.drill.exec.physical.impl.common.ChainedHashTable)

Example 2 with IndexPointer

use of org.apache.drill.exec.physical.impl.common.IndexPointer in project drill by apache.

the class HashJoinBatch method executeBuildPhase.

public void executeBuildPhase() throws SchemaChangeException, ClassTransformationException, IOException {
    // skip first batch if count is zero, as it may be an empty schema batch
    if (right.getRecordCount() == 0) {
        for (final VectorWrapper<?> w : right) {
            w.clear();
        }
        rightUpstream = next(right);
    }
    boolean moreData = true;
    while (moreData) {
        switch(rightUpstream) {
            case OUT_OF_MEMORY:
            case NONE:
            case NOT_YET:
            case STOP:
                moreData = false;
                continue;
            case OK_NEW_SCHEMA:
                if (rightSchema == null) {
                    rightSchema = right.getSchema();
                    if (rightSchema.getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
                        final String errorMsg = new StringBuilder().append("Hash join does not support build batch with selection vectors. ").append("Build batch has selection mode = ").append(left.getSchema().getSelectionVectorMode()).toString();
                        throw new SchemaChangeException(errorMsg);
                    }
                    setupHashTable();
                } else {
                    if (!rightSchema.equals(right.getSchema())) {
                        throw SchemaChangeException.schemaChanged("Hash join does not support schema changes in build side.", rightSchema, right.getSchema());
                    }
                    hashTable.updateBatches();
                }
            // Fall through
            case OK:
                final int currentRecordCount = right.getRecordCount();
                /* For every new build batch, we store some state in the helper context
                     * Add new state to the helper context
                     */
                hjHelper.addNewBatch(currentRecordCount);
                // Holder contains the global index where the key is hashed into using the hash table
                final IndexPointer htIndex = new IndexPointer();
                // For every record in the build batch , hash the key columns
                for (int i = 0; i < currentRecordCount; i++) {
                    hashTable.put(i, htIndex, 1);
                    /* Use the global index returned by the hash table, to store
                         * the current record index and batch index. This will be used
                         * later when we probe and find a match.
                         */
                    hjHelper.setCurrentIndex(htIndex.value, buildBatchIndex, i);
                }
                /* Completed hashing all records in this batch. Transfer the batch
                     * to the hyper vector container. Will be used when we want to retrieve
                     * records that have matching keys on the probe side.
                     */
                final RecordBatchData nextBatch = new RecordBatchData(right, oContext.getAllocator());
                boolean success = false;
                try {
                    if (hyperContainer == null) {
                        hyperContainer = new ExpandableHyperContainer(nextBatch.getContainer());
                    } else {
                        hyperContainer.addBatch(nextBatch.getContainer());
                    }
                    // completed processing a batch, increment batch index
                    buildBatchIndex++;
                    success = true;
                } finally {
                    if (!success) {
                        nextBatch.clear();
                    }
                }
                break;
        }
        // Get the next record batch
        rightUpstream = next(HashJoinHelper.RIGHT_INPUT, right);
    }
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) RecordBatchData(org.apache.drill.exec.physical.impl.sort.RecordBatchData) IndexPointer(org.apache.drill.exec.physical.impl.common.IndexPointer)

Aggregations

IndexPointer (org.apache.drill.exec.physical.impl.common.IndexPointer)2 FieldReference (org.apache.drill.common.expression.FieldReference)1 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)1 ChainedHashTable (org.apache.drill.exec.physical.impl.common.ChainedHashTable)1 RecordBatchData (org.apache.drill.exec.physical.impl.sort.RecordBatchData)1 ExpandableHyperContainer (org.apache.drill.exec.record.ExpandableHyperContainer)1 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)1