Search in sources :

Example 6 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class SingleBatchSorterTemplate method sort.

@Override
public void sort(SelectionVector2 vector2) {
    QuickSort qs = new QuickSort();
    Stopwatch watch = Stopwatch.createStarted();
    if (vector2.getCount() > 0) {
        qs.sort(this, 0, vector2.getCount());
    }
    logger.debug("Took {} us to sort {} records", watch.elapsed(TimeUnit.MICROSECONDS), vector2.getCount());
}
Also used : QuickSort(org.apache.hadoop.util.QuickSort) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)

Example 7 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class SpilledRun method readBatch.

private void readBatch() {
    try {
        if (inputStream == null) {
            inputStream = spillSet.openForInput(path);
            reader = VectorSerializer.reader(allocator, inputStream);
        }
        Stopwatch watch = Stopwatch.createStarted();
        long start = allocator.getAllocatedMemory();
        VectorContainer c = reader.read();
        long end = allocator.getAllocatedMemory();
        logger.trace("Read {} records in {} us; size = {}, memory = {}", c.getRecordCount(), watch.elapsed(TimeUnit.MICROSECONDS), (end - start), end);
        if (schema != null) {
            c = SchemaUtil.coerceContainer(c, schema, allocator);
        }
        spilledBatches--;
        currentContainer.zeroVectors();
        Iterator<VectorWrapper<?>> wrapperIterator = c.iterator();
        for (VectorWrapper<?> w : currentContainer) {
            TransferPair pair = wrapperIterator.next().getValueVector().makeTransferPair(w.getValueVector());
            pair.transfer();
        }
        currentContainer.setRecordCount(c.getRecordCount());
        c.zeroVectors();
    } catch (IOException e) {
        // Release any partially-loaded data.
        currentContainer.clear();
        throw UserException.dataReadError(e).message("Failure while reading spilled data").build(logger);
    }
}
Also used : TransferPair(org.apache.drill.exec.record.TransferPair) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) IOException(java.io.IOException) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 8 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class FilePushDownFilter method doOnMatch.

protected void doOnMatch(RelOptRuleCall call, FilterPrel filter, ProjectPrel project, ScanPrel scan) {
    AbstractGroupScanWithMetadata<?> groupScan = (AbstractGroupScanWithMetadata<?>) scan.getGroupScan();
    if (groupScan.getFilter() != null && !groupScan.getFilter().equals(ValueExpressions.BooleanExpression.TRUE)) {
        return;
    }
    RexNode condition;
    if (project == null) {
        condition = filter.getCondition();
    } else {
        // get the filter as if it were below the projection.
        condition = RelOptUtil.pushPastProject(filter.getCondition(), project);
    }
    if (condition == null || condition.isAlwaysTrue()) {
        return;
    }
    // get a conjunctions of the filter condition. For each conjunction, if it refers to ITEM or FLATTEN expression
    // then we could not pushed down. Otherwise, it's qualified to be pushed down.
    // Limits the number of nodes that can be created out of the conversion to avoid
    // exponential growth of nodes count and further OOM
    final List<RexNode> predList = RelOptUtil.conjunctions(RexUtil.toCnf(filter.getCluster().getRexBuilder(), 100, condition));
    final List<RexNode> qualifiedPredList = new ArrayList<>();
    // list of predicates which cannot be converted to filter predicate
    List<RexNode> nonConvertedPredList = new ArrayList<>();
    for (RexNode pred : predList) {
        if (DrillRelOptUtil.findOperators(pred, Collections.emptyList(), BANNED_OPERATORS) == null) {
            LogicalExpression drillPredicate = DrillOptiq.toDrill(new DrillParseContext(PrelUtil.getPlannerSettings(call.getPlanner())), scan, pred);
            // checks whether predicate may be used for filter pushdown
            FilterPredicate<?> filterPredicate = groupScan.getFilterPredicate(drillPredicate, optimizerContext, optimizerContext.getFunctionRegistry(), optimizerContext.getPlannerSettings().getOptions(), false);
            // to build filter with them
            if (filterPredicate == null) {
                nonConvertedPredList.add(pred);
            }
            qualifiedPredList.add(pred);
        } else {
            nonConvertedPredList.add(pred);
        }
    }
    final RexNode qualifiedPred = RexUtil.composeConjunction(filter.getCluster().getRexBuilder(), qualifiedPredList, true);
    if (qualifiedPred == null) {
        return;
    }
    LogicalExpression conditionExp = DrillOptiq.toDrill(new DrillParseContext(PrelUtil.getPlannerSettings(call.getPlanner())), scan, qualifiedPred);
    // Default - pass the original filter expr to (potentially) be used at run-time
    // later may remove or set to another filter (see below)
    groupScan.setFilterForRuntime(conditionExp, optimizerContext);
    Stopwatch timer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null;
    AbstractGroupScanWithMetadata<?> newGroupScan = groupScan.applyFilter(conditionExp, optimizerContext, optimizerContext.getFunctionRegistry(), optimizerContext.getPlannerSettings().getOptions());
    if (timer != null) {
        logger.debug("Took {} ms to apply filter. ", timer.elapsed(TimeUnit.MILLISECONDS));
        timer.stop();
    }
    // fully match the filter for the case when row group pruning did not happen.
    if (newGroupScan == null) {
        if (groupScan.isMatchAllMetadata()) {
            RelNode child = project == null ? scan : project;
            // but row group pruning did not happen, remove the filter.
            if (nonConvertedPredList.isEmpty()) {
                // disable the original filter expr (i.e. don't use it at run-time)
                groupScan.setFilterForRuntime(null, optimizerContext);
                call.transformTo(child);
            } else if (nonConvertedPredList.size() == predList.size()) {
                // None of the predicates participated in filter pushdown.
                return;
            } else {
                // If some of the predicates weren't used in the filter, creates new filter with them
                // on top of current scan. Excludes the case when all predicates weren't used in the filter.
                Filter theNewFilter = filter.copy(filter.getTraitSet(), child, RexUtil.composeConjunction(filter.getCluster().getRexBuilder(), nonConvertedPredList, true));
                LogicalExpression filterPredicate = DrillOptiq.toDrill(new DrillParseContext(PrelUtil.getPlannerSettings(call.getPlanner())), scan, theNewFilter.getCondition());
                // pass the new filter expr to (potentialy) be used at run-time
                groupScan.setFilterForRuntime(filterPredicate, optimizerContext);
                // Replace the child with the new filter on top of the child/scan
                call.transformTo(theNewFilter);
            }
        }
        return;
    }
    RelNode newNode = new ScanPrel(scan.getCluster(), scan.getTraitSet(), newGroupScan, scan.getRowType(), scan.getTable());
    if (project != null) {
        newNode = project.copy(project.getTraitSet(), Collections.singletonList(newNode));
    }
    if (newGroupScan.isMatchAllMetadata()) {
        // creates filter from the expressions which can't be pushed to the scan
        if (!nonConvertedPredList.isEmpty()) {
            Filter theFilterRel = filter.copy(filter.getTraitSet(), newNode, RexUtil.composeConjunction(filter.getCluster().getRexBuilder(), nonConvertedPredList, true));
            LogicalExpression filterPredicate = DrillOptiq.toDrill(new DrillParseContext(PrelUtil.getPlannerSettings(call.getPlanner())), scan, theFilterRel.getCondition());
            // pass the new filter expr to (potentialy) be used at run-time
            newGroupScan.setFilterForRuntime(filterPredicate, optimizerContext);
            // replace the new node with the new filter on top of that new node
            newNode = theFilterRel;
        }
        call.transformTo(newNode);
        return;
    }
    final RelNode newFilter = filter.copy(filter.getTraitSet(), Collections.singletonList(newNode));
    call.transformTo(newFilter);
}
Also used : LogicalExpression(org.apache.drill.common.expression.LogicalExpression) ScanPrel(org.apache.drill.exec.planner.physical.ScanPrel) RelNode(org.apache.calcite.rel.RelNode) Filter(org.apache.calcite.rel.core.Filter) ArrayList(java.util.ArrayList) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) DrillParseContext(org.apache.drill.exec.planner.logical.DrillParseContext) AbstractGroupScanWithMetadata(org.apache.drill.exec.physical.base.AbstractGroupScanWithMetadata) RexNode(org.apache.calcite.rex.RexNode)

Example 9 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class ParquetFormatPlugin method readStatistics.

@Override
public TableStatistics readStatistics(FileSystem fs, Path statsTablePath) throws IOException {
    Stopwatch timer = Stopwatch.createStarted();
    ObjectMapper mapper = DrillStatsTable.getMapper();
    mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
    FSDataInputStream is = fs.open(statsTablePath);
    TableStatistics statistics = mapper.readValue((InputStream) is, TableStatistics.class);
    logger.info("Took {} ms to read statistics from {} format plugin", timer.elapsed(TimeUnit.MILLISECONDS), name);
    timer.stop();
    return statistics;
}
Also used : Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) TableStatistics(org.apache.drill.exec.planner.common.DrillStatsTable.TableStatistics) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 10 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class AsyncPageReader method decompressPageV2.

/**
 * Reads a compressed v2 data page which excluded the repetition and definition level
 * sections from compression.
 * @return decompressed Parquet page data
 * @throws IOException
 */
protected DrillBuf decompressPageV2(ReadStatus readStatus) throws IOException {
    Stopwatch timer = Stopwatch.createUnstarted();
    PageHeader pageHeader = readStatus.getPageHeader();
    int inputSize = pageHeader.getCompressed_page_size();
    int repLevelSize = pageHeader.data_page_header_v2.getRepetition_levels_byte_length();
    int defLevelSize = pageHeader.data_page_header_v2.getDefinition_levels_byte_length();
    int compDataOffset = repLevelSize + defLevelSize;
    int outputSize = pageHeader.uncompressed_page_size;
    // TODO: does reporting this number have the same meaning in an async context?
    long start = dataReader.getPos();
    long timeToRead;
    DrillBuf inputPageData = readStatus.getPageData();
    DrillBuf outputPageData = this.allocator.buffer(outputSize);
    try {
        timer.start();
        // Write out the uncompressed section
        // Note that the following setBytes call to read the repetition and definition level sections
        // advances readerIndex in inputPageData but not writerIndex in outputPageData.
        outputPageData.setBytes(0, inputPageData, compDataOffset);
        // decompress from the start of compressed data to the end of the input buffer
        CompressionCodecName codecName = columnChunkMetaData.getCodec();
        CompressionCodecFactory.BytesInputDecompressor decomp = codecFactory.getDecompressor(codecName);
        ByteBuffer input = inputPageData.nioBuffer(compDataOffset, inputSize - compDataOffset);
        ByteBuffer output = outputPageData.nioBuffer(compDataOffset, outputSize - compDataOffset);
        decomp.decompress(input, inputSize - compDataOffset, output, outputSize - compDataOffset);
        outputPageData.writerIndex(outputSize);
        timeToRead = timer.elapsed(TimeUnit.NANOSECONDS);
        if (logger.isTraceEnabled()) {
            logger.trace("Col: {}  readPos: {}  Uncompressed_size: {}  pageData: {}", columnChunkMetaData.toString(), // TODO: see comment on earlier call to getPos()
            dataReader.getPos(), outputSize, ByteBufUtil.hexDump(outputPageData));
        }
        this.updateStats(pageHeader, "Decompress", start, timeToRead, inputSize, outputSize);
    } finally {
        readStatus.setPageData(null);
        if (inputPageData != null) {
            inputPageData.release();
        }
    }
    return outputPageData;
}
Also used : CompressionCodecFactory(org.apache.parquet.compression.CompressionCodecFactory) PageHeader(org.apache.parquet.format.PageHeader) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) ByteBuffer(java.nio.ByteBuffer) DrillBuf(io.netty.buffer.DrillBuf)

Aggregations

Stopwatch (org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)68 IOException (java.io.IOException)13 Path (org.apache.hadoop.fs.Path)12 ArrayList (java.util.ArrayList)8 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)8 FileStatus (org.apache.hadoop.fs.FileStatus)8 DrillBuf (io.netty.buffer.DrillBuf)7 ByteBuffer (java.nio.ByteBuffer)7 SchemaPath (org.apache.drill.common.expression.SchemaPath)7 HashMap (java.util.HashMap)5 RelNode (org.apache.calcite.rel.RelNode)5 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)4 DrillbitEndpoint (org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)4 VectorContainer (org.apache.drill.exec.record.VectorContainer)4 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)4 ValueVector (org.apache.drill.exec.vector.ValueVector)4 CompressionCodecName (org.apache.parquet.hadoop.metadata.CompressionCodecName)4 File (java.io.File)3 ResultSet (java.sql.ResultSet)3 ResultSetMetaData (java.sql.ResultSetMetaData)3