Search in sources :

Example 71 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class QueryBuilder method queryPlan.

/**
   * Submit an "EXPLAIN" statement, and return the column value which
   * contains the plan's string.
   * <p>
   * Cribbed from {@link PlanTestBase#getPlanInString(String, String)}
   * @throws Exception if anything goes wrogn in the query
   */
protected String queryPlan(String columnName) throws Exception {
    Preconditions.checkArgument(queryType == QueryType.SQL, "Can only explan an SQL query.");
    final List<QueryDataBatch> results = results();
    final RecordBatchLoader loader = new RecordBatchLoader(client.allocator());
    final StringBuilder builder = new StringBuilder();
    for (final QueryDataBatch b : results) {
        if (!b.hasData()) {
            continue;
        }
        loader.load(b.getHeader().getDef(), b.getData());
        final VectorWrapper<?> vw;
        try {
            vw = loader.getValueAccessorById(NullableVarCharVector.class, loader.getValueVectorId(SchemaPath.getSimplePath(columnName)).getFieldIds());
        } catch (Throwable t) {
            throw new IllegalStateException("Looks like you did not provide an explain plan query, please add EXPLAIN PLAN FOR to the beginning of your query.");
        }
        @SuppressWarnings("resource") final ValueVector vv = vw.getValueVector();
        for (int i = 0; i < vv.getAccessor().getValueCount(); i++) {
            final Object o = vv.getAccessor().getObject(i);
            builder.append(o);
        }
        loader.clear();
        b.release();
    }
    return builder.toString();
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) NullableVarCharVector(org.apache.drill.exec.vector.NullableVarCharVector) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader)

Example 72 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class HBaseRecordReader method next.

@Override
public int next() {
    Stopwatch watch = Stopwatch.createStarted();
    if (rowKeyVector != null) {
        rowKeyVector.clear();
        rowKeyVector.allocateNew();
    }
    for (ValueVector v : familyVectorMap.values()) {
        v.clear();
        v.allocateNew();
    }
    int rowCount = 0;
    // if allocated memory for the first row is larger than allowed max in batch, it will be added anyway
    do {
        Result result = null;
        final OperatorStats operatorStats = operatorContext == null ? null : operatorContext.getStats();
        try {
            if (operatorStats != null) {
                operatorStats.startWait();
            }
            try {
                result = resultScanner.next();
            } finally {
                if (operatorStats != null) {
                    operatorStats.stopWait();
                }
            }
        } catch (IOException e) {
            throw new DrillRuntimeException(e);
        }
        if (result == null) {
            break;
        }
        // parse the result and populate the value vectors
        Cell[] cells = result.rawCells();
        if (rowKeyVector != null) {
            rowKeyVector.getMutator().setSafe(rowCount, cells[0].getRowArray(), cells[0].getRowOffset(), cells[0].getRowLength());
        }
        if (!rowKeyOnly) {
            for (final Cell cell : cells) {
                final int familyOffset = cell.getFamilyOffset();
                final int familyLength = cell.getFamilyLength();
                final byte[] familyArray = cell.getFamilyArray();
                final MapVector mv = getOrCreateFamilyVector(new String(familyArray, familyOffset, familyLength), true);
                final int qualifierOffset = cell.getQualifierOffset();
                final int qualifierLength = cell.getQualifierLength();
                final byte[] qualifierArray = cell.getQualifierArray();
                final NullableVarBinaryVector v = getOrCreateColumnVector(mv, new String(qualifierArray, qualifierOffset, qualifierLength));
                final int valueOffset = cell.getValueOffset();
                final int valueLength = cell.getValueLength();
                final byte[] valueArray = cell.getValueArray();
                v.getMutator().setSafe(rowCount, valueArray, valueOffset, valueLength);
            }
        }
        rowCount++;
    } while (canAddNewRow(rowCount));
    setOutputRowCount(rowCount);
    logger.debug("Took {} ms to get {} records", watch.elapsed(TimeUnit.MILLISECONDS), rowCount);
    return rowCount;
}
Also used : NullableVarBinaryVector(org.apache.drill.exec.vector.NullableVarBinaryVector) Stopwatch(com.google.common.base.Stopwatch) OperatorStats(org.apache.drill.exec.ops.OperatorStats) IOException(java.io.IOException) Result(org.apache.hadoop.hbase.client.Result) ValueVector(org.apache.drill.exec.vector.ValueVector) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) Cell(org.apache.hadoop.hbase.Cell) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Example 73 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class TestOrderedBytesConvertFunctions method getRunResult.

protected Object[] getRunResult(QueryType queryType, String planString) throws Exception {
    List<QueryDataBatch> resultList = testRunAndReturn(queryType, planString);
    List<Object> res = new ArrayList<Object>();
    RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
    for (QueryDataBatch result : resultList) {
        if (result.getData() != null) {
            loader.load(result.getHeader().getDef(), result.getData());
            ValueVector v = loader.iterator().next().getValueVector();
            for (int j = 0; j < v.getAccessor().getValueCount(); j++) {
                if (v instanceof VarCharVector) {
                    res.add(new String(((VarCharVector) v).getAccessor().get(j)));
                } else {
                    res.add(v.getAccessor().getObject(j));
                }
            }
            loader.clear();
            result.release();
        }
    }
    return res.toArray();
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) ArrayList(java.util.ArrayList) VarCharVector(org.apache.drill.exec.vector.VarCharVector)

Example 74 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class PruneScanRule method doOnMatch.

protected void doOnMatch(RelOptRuleCall call, Filter filterRel, Project projectRel, TableScan scanRel) {
    final String pruningClassName = getClass().getName();
    logger.info("Beginning partition pruning, pruning class: {}", pruningClassName);
    Stopwatch totalPruningTime = Stopwatch.createStarted();
    final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
    PartitionDescriptor descriptor = getPartitionDescriptor(settings, scanRel);
    final BufferAllocator allocator = optimizerContext.getAllocator();
    final Object selection = getDrillTable(scanRel).getSelection();
    MetadataContext metaContext = null;
    if (selection instanceof FormatSelection) {
        metaContext = ((FormatSelection) selection).getSelection().getMetaContext();
    }
    RexNode condition = null;
    if (projectRel == null) {
        condition = filterRel.getCondition();
    } else {
        // get the filter as if it were below the projection.
        condition = RelOptUtil.pushFilterPastProject(filterRel.getCondition(), projectRel);
    }
    RewriteAsBinaryOperators visitor = new RewriteAsBinaryOperators(true, filterRel.getCluster().getRexBuilder());
    condition = condition.accept(visitor);
    Map<Integer, String> fieldNameMap = Maps.newHashMap();
    List<String> fieldNames = scanRel.getRowType().getFieldNames();
    BitSet columnBitset = new BitSet();
    BitSet partitionColumnBitSet = new BitSet();
    Map<Integer, Integer> partitionMap = Maps.newHashMap();
    int relColIndex = 0;
    for (String field : fieldNames) {
        final Integer partitionIndex = descriptor.getIdIfValid(field);
        if (partitionIndex != null) {
            fieldNameMap.put(partitionIndex, field);
            partitionColumnBitSet.set(partitionIndex);
            columnBitset.set(relColIndex);
            // mapping between the relColIndex and partitionIndex
            partitionMap.put(relColIndex, partitionIndex);
        }
        relColIndex++;
    }
    if (partitionColumnBitSet.isEmpty()) {
        logger.info("No partition columns are projected from the scan..continue. " + "Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
        setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
        return;
    }
    // stop watch to track how long we spend in different phases of pruning
    Stopwatch miscTimer = Stopwatch.createUnstarted();
    // track how long we spend building the filter tree
    miscTimer.start();
    FindPartitionConditions c = new FindPartitionConditions(columnBitset, filterRel.getCluster().getRexBuilder());
    c.analyze(condition);
    RexNode pruneCondition = c.getFinalCondition();
    BitSet referencedDirsBitSet = c.getReferencedDirs();
    logger.info("Total elapsed time to build and analyze filter tree: {} ms", miscTimer.elapsed(TimeUnit.MILLISECONDS));
    miscTimer.reset();
    if (pruneCondition == null) {
        logger.info("No conditions were found eligible for partition pruning." + "Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
        setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
        return;
    }
    // set up the partitions
    List<PartitionLocation> newPartitions = Lists.newArrayList();
    // total number of partitions
    long numTotal = 0;
    int batchIndex = 0;
    PartitionLocation firstLocation = null;
    LogicalExpression materializedExpr = null;
    String[] spInfo = null;
    int maxIndex = -1;
    BitSet matchBitSet = new BitSet();
    // Outer loop: iterate over a list of batches of PartitionLocations
    for (List<PartitionLocation> partitions : descriptor) {
        numTotal += partitions.size();
        logger.debug("Evaluating partition pruning for batch {}", batchIndex);
        if (batchIndex == 0) {
            // save the first location in case everything is pruned
            firstLocation = partitions.get(0);
        }
        final NullableBitVector output = new NullableBitVector(MaterializedField.create("", Types.optional(MinorType.BIT)), allocator);
        final VectorContainer container = new VectorContainer();
        try {
            final ValueVector[] vectors = new ValueVector[descriptor.getMaxHierarchyLevel()];
            for (int partitionColumnIndex : BitSets.toIter(partitionColumnBitSet)) {
                SchemaPath column = SchemaPath.getSimplePath(fieldNameMap.get(partitionColumnIndex));
                MajorType type = descriptor.getVectorType(column, settings);
                MaterializedField field = MaterializedField.create(column.getAsUnescapedPath(), type);
                ValueVector v = TypeHelper.getNewVector(field, allocator);
                v.allocateNew();
                vectors[partitionColumnIndex] = v;
                container.add(v);
            }
            // track how long we spend populating partition column vectors
            miscTimer.start();
            // populate partition vectors.
            descriptor.populatePartitionVectors(vectors, partitions, partitionColumnBitSet, fieldNameMap);
            logger.info("Elapsed time to populate partitioning column vectors: {} ms within batchIndex: {}", miscTimer.elapsed(TimeUnit.MILLISECONDS), batchIndex);
            miscTimer.reset();
            // materialize the expression; only need to do this once
            if (batchIndex == 0) {
                materializedExpr = materializePruneExpr(pruneCondition, settings, scanRel, container);
                if (materializedExpr == null) {
                    // continue without partition pruning; no need to log anything here since
                    // materializePruneExpr logs it already
                    logger.info("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
                    setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
                    return;
                }
            }
            output.allocateNew(partitions.size());
            // start the timer to evaluate how long we spend in the interpreter evaluation
            miscTimer.start();
            InterpreterEvaluator.evaluate(partitions.size(), optimizerContext, container, output, materializedExpr);
            logger.info("Elapsed time in interpreter evaluation: {} ms within batchIndex: {} with # of partitions : {}", miscTimer.elapsed(TimeUnit.MILLISECONDS), batchIndex, partitions.size());
            miscTimer.reset();
            int recordCount = 0;
            int qualifiedCount = 0;
            if (descriptor.supportsMetadataCachePruning() && partitions.get(0).isCompositePartition()) /* apply single partition check only for composite partitions */
            {
                // Inner loop: within each batch iterate over the PartitionLocations
                for (PartitionLocation part : partitions) {
                    assert part.isCompositePartition();
                    if (!output.getAccessor().isNull(recordCount) && output.getAccessor().get(recordCount) == 1) {
                        newPartitions.add(part);
                        // Rather than using the PartitionLocation, get the array of partition values for the directories that are
                        // referenced by the filter since we are not interested in directory references in other parts of the query.
                        Pair<String[], Integer> p = composePartition(referencedDirsBitSet, partitionMap, vectors, recordCount);
                        String[] parts = p.getLeft();
                        int tmpIndex = p.getRight();
                        maxIndex = Math.max(maxIndex, tmpIndex);
                        if (spInfo == null) {
                            // initialization
                            spInfo = parts;
                            for (int j = 0; j <= tmpIndex; j++) {
                                if (parts[j] != null) {
                                    matchBitSet.set(j);
                                }
                            }
                        } else {
                            // compare the new partition with existing partition
                            for (int j = 0; j <= tmpIndex; j++) {
                                if (parts[j] == null || spInfo[j] == null) {
                                    // nulls don't match
                                    matchBitSet.clear(j);
                                } else {
                                    if (!parts[j].equals(spInfo[j])) {
                                        matchBitSet.clear(j);
                                    }
                                }
                            }
                        }
                        qualifiedCount++;
                    }
                    recordCount++;
                }
            } else {
                // Inner loop: within each batch iterate over the PartitionLocations
                for (PartitionLocation part : partitions) {
                    if (!output.getAccessor().isNull(recordCount) && output.getAccessor().get(recordCount) == 1) {
                        newPartitions.add(part);
                        qualifiedCount++;
                    }
                    recordCount++;
                }
            }
            logger.debug("Within batch {}: total records: {}, qualified records: {}", batchIndex, recordCount, qualifiedCount);
            batchIndex++;
        } catch (Exception e) {
            logger.warn("Exception while trying to prune partition.", e);
            logger.info("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
            setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
            // continue without partition pruning
            return;
        } finally {
            container.clear();
            if (output != null) {
                output.clear();
            }
        }
    }
    try {
        if (newPartitions.size() == numTotal) {
            logger.info("No partitions were eligible for pruning");
            return;
        }
        // handle the case all partitions are filtered out.
        boolean canDropFilter = true;
        boolean wasAllPartitionsPruned = false;
        String cacheFileRoot = null;
        if (newPartitions.isEmpty()) {
            assert firstLocation != null;
            // Add the first non-composite partition location, since execution requires schema.
            // In such case, we should not drop filter.
            newPartitions.add(firstLocation.getPartitionLocationRecursive().get(0));
            canDropFilter = false;
            // NOTE: with DRILL-4530, the PruneScanRule may be called with only a list of
            // directories first and the non-composite partition location will still return
            // directories, not files.  So, additional processing is done depending on this flag
            wasAllPartitionsPruned = true;
            logger.info("All {} partitions were pruned; added back a single partition to allow creating a schema", numTotal);
            // set the cacheFileRoot appropriately
            if (firstLocation.isCompositePartition()) {
                cacheFileRoot = descriptor.getBaseTableLocation() + firstLocation.getCompositePartitionPath();
            }
        }
        logger.info("Pruned {} partitions down to {}", numTotal, newPartitions.size());
        List<RexNode> conjuncts = RelOptUtil.conjunctions(condition);
        List<RexNode> pruneConjuncts = RelOptUtil.conjunctions(pruneCondition);
        conjuncts.removeAll(pruneConjuncts);
        RexNode newCondition = RexUtil.composeConjunction(filterRel.getCluster().getRexBuilder(), conjuncts, false);
        RewriteCombineBinaryOperators reverseVisitor = new RewriteCombineBinaryOperators(true, filterRel.getCluster().getRexBuilder());
        condition = condition.accept(reverseVisitor);
        pruneCondition = pruneCondition.accept(reverseVisitor);
        if (descriptor.supportsMetadataCachePruning() && !wasAllPartitionsPruned) {
            // if metadata cache file could potentially be used, then assign a proper cacheFileRoot
            int index = -1;
            if (!matchBitSet.isEmpty()) {
                String path = "";
                index = matchBitSet.length() - 1;
                for (int j = 0; j < matchBitSet.length(); j++) {
                    if (!matchBitSet.get(j)) {
                        // stop at the first index with no match and use the immediate
                        // previous index
                        index = j - 1;
                        break;
                    }
                }
                for (int j = 0; j <= index; j++) {
                    path += "/" + spInfo[j];
                }
                cacheFileRoot = descriptor.getBaseTableLocation() + path;
            }
            if (index != maxIndex) {
                // if multiple partitions are being selected, we should not drop the filter
                // since we are reading the cache file at a parent/ancestor level
                canDropFilter = false;
            }
        }
        RelNode inputRel = descriptor.supportsMetadataCachePruning() ? descriptor.createTableScan(newPartitions, cacheFileRoot, wasAllPartitionsPruned, metaContext) : descriptor.createTableScan(newPartitions, wasAllPartitionsPruned);
        if (projectRel != null) {
            inputRel = projectRel.copy(projectRel.getTraitSet(), Collections.singletonList(inputRel));
        }
        if (newCondition.isAlwaysTrue() && canDropFilter) {
            call.transformTo(inputRel);
        } else {
            final RelNode newFilter = filterRel.copy(filterRel.getTraitSet(), Collections.singletonList(inputRel));
            call.transformTo(newFilter);
        }
        setPruneStatus(metaContext, PruneStatus.PRUNED);
    } catch (Exception e) {
        logger.warn("Exception while using the pruned partitions.", e);
    } finally {
        logger.info("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
    }
}
Also used : PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) Stopwatch(com.google.common.base.Stopwatch) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) NullableBitVector(org.apache.drill.exec.vector.NullableBitVector) SchemaPath(org.apache.drill.common.expression.SchemaPath) PartitionDescriptor(org.apache.drill.exec.planner.PartitionDescriptor) FileSystemPartitionDescriptor(org.apache.drill.exec.planner.FileSystemPartitionDescriptor) PartitionLocation(org.apache.drill.exec.planner.PartitionLocation) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) BitSet(java.util.BitSet) MaterializedField(org.apache.drill.exec.record.MaterializedField) BufferAllocator(org.apache.drill.exec.memory.BufferAllocator) VectorContainer(org.apache.drill.exec.record.VectorContainer) ValueVector(org.apache.drill.exec.vector.ValueVector) RelNode(org.apache.calcite.rel.RelNode) MetadataContext(org.apache.drill.exec.store.dfs.MetadataContext) RexNode(org.apache.calcite.rex.RexNode)

Example 75 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class RecordBatchLoader method load.

/**
   * Load a record batch from a single buffer.
   *
   * @param def
   *          The definition for the record batch.
   * @param buf
   *          The buffer that holds the data associated with the record batch.
   * @return Whether the schema changed since the previous load.
   * @throws SchemaChangeException
   *   TODO:  Clean:  DRILL-2933  load(...) never actually throws SchemaChangeException.
   */
@SuppressWarnings("resource")
public boolean load(RecordBatchDef def, DrillBuf buf) throws SchemaChangeException {
    if (logger.isTraceEnabled()) {
        logger.trace("Loading record batch with def {} and data {}", def, buf);
        logger.trace("Load, ThreadID: {}\n{}", Thread.currentThread().getId(), new StackTrace());
    }
    container.zeroVectors();
    valueCount = def.getRecordCount();
    boolean schemaChanged = schema == null;
    // Load vectors from the batch buffer, while tracking added and/or removed
    // vectors (relative to the previous call) in order to determine whether the
    // the schema has changed since the previous call.
    // Set up to recognize previous fields that no longer exist.
    final Map<String, ValueVector> oldFields = Maps.newHashMap();
    for (final VectorWrapper<?> wrapper : container) {
        final ValueVector vector = wrapper.getValueVector();
        oldFields.put(vector.getField().getPath(), vector);
    }
    final VectorContainer newVectors = new VectorContainer();
    try {
        final List<SerializedField> fields = def.getFieldList();
        int bufOffset = 0;
        for (final SerializedField field : fields) {
            final MaterializedField fieldDef = MaterializedField.create(field);
            ValueVector vector = oldFields.remove(fieldDef.getPath());
            if (vector == null) {
                // Field did not exist previously--is schema change.
                schemaChanged = true;
                vector = TypeHelper.getNewVector(fieldDef, allocator);
            } else if (!vector.getField().getType().equals(fieldDef.getType())) {
                // Field had different type before--is schema change.
                // clear previous vector
                vector.clear();
                schemaChanged = true;
                vector = TypeHelper.getNewVector(fieldDef, allocator);
            }
            // Load the vector.
            if (field.getValueCount() == 0) {
                AllocationHelper.allocate(vector, 0, 0, 0);
            } else {
                vector.load(field, buf.slice(bufOffset, field.getBufferLength()));
            }
            bufOffset += field.getBufferLength();
            newVectors.add(vector);
        }
        // rebuild the schema.
        final SchemaBuilder builder = BatchSchema.newBuilder();
        for (final VectorWrapper<?> v : newVectors) {
            builder.addField(v.getField());
        }
        builder.setSelectionVectorMode(BatchSchema.SelectionVectorMode.NONE);
        schema = builder.build();
        newVectors.buildSchema(BatchSchema.SelectionVectorMode.NONE);
        container = newVectors;
    } catch (final Throwable cause) {
        // adjudicate to call upper layer specific clean up logic.
        for (final VectorWrapper<?> wrapper : newVectors) {
            wrapper.getValueVector().clear();
        }
        throw cause;
    } finally {
        if (!oldFields.isEmpty()) {
            schemaChanged = true;
            for (final ValueVector vector : oldFields.values()) {
                vector.clear();
            }
        }
    }
    return schemaChanged;
}
Also used : StackTrace(org.apache.drill.common.StackTrace) ValueVector(org.apache.drill.exec.vector.ValueVector) SerializedField(org.apache.drill.exec.proto.UserBitShared.SerializedField)

Aggregations

ValueVector (org.apache.drill.exec.vector.ValueVector)130 MaterializedField (org.apache.drill.exec.record.MaterializedField)29 Test (org.junit.Test)21 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)18 RecordBatchLoader (org.apache.drill.exec.record.RecordBatchLoader)13 VectorWrapper (org.apache.drill.exec.record.VectorWrapper)13 QueryDataBatch (org.apache.drill.exec.rpc.user.QueryDataBatch)12 ExecTest (org.apache.drill.exec.ExecTest)11 IOException (java.io.IOException)10 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)10 VectorContainer (org.apache.drill.exec.record.VectorContainer)10 Drillbit (org.apache.drill.exec.server.Drillbit)10 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)9 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)9 RemoteServiceSet (org.apache.drill.exec.server.RemoteServiceSet)9 SchemaPath (org.apache.drill.common.expression.SchemaPath)8 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)8 FunctionImplementationRegistry (org.apache.drill.exec.expr.fn.FunctionImplementationRegistry)8 FragmentContext (org.apache.drill.exec.ops.FragmentContext)8 PhysicalPlan (org.apache.drill.exec.physical.PhysicalPlan)8