Search in sources :

Example 56 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class HiveMetadataProvider method getStats.

/**
 * Return stats for table/partitions in given {@link HiveReadEntry}.
 * If valid stats are available in MetaStore, return it.
 * Otherwise estimate using the size of the input data.
 *
 * @param hiveReadEntry Subset of the {@link HiveReadEntry} used when creating this cache object.
 * @return hive statistics holder
 * @throws IOException if was unable to retrieve table statistics
 */
public HiveStats getStats(final HiveReadEntry hiveReadEntry) throws IOException {
    Stopwatch timeGetStats = Stopwatch.createStarted();
    HiveTableWithColumnCache table = hiveReadEntry.getTable();
    try {
        if (!isPartitionedTable) {
            Properties properties = new Table(table).getMetadata();
            HiveStats stats = HiveStats.getStatsFromProps(properties);
            if (stats.valid()) {
                return stats;
            }
            return stats.getSizeInBytes() > 0 ? estimateStatsFromBytes(stats.getSizeInBytes()) : estimateStatsFromInputSplits(getTableInputSplits());
        } else {
            HiveStats aggStats = new HiveStats(0, 0);
            for (HivePartition partition : hiveReadEntry.getPartitions()) {
                Properties properties = HiveUtilities.getPartitionMetadata(partition, table);
                HiveStats stats = HiveStats.getStatsFromProps(properties);
                if (!stats.valid()) {
                    stats = stats.getSizeInBytes() > 0 ? estimateStatsFromBytes(stats.getSizeInBytes()) : estimateStatsFromInputSplits(getPartitionInputSplits(partition));
                }
                aggStats.add(stats);
            }
            return aggStats;
        }
    } catch (Exception e) {
        throw new IOException("Failed to get number of rows and total size from HiveTable", e);
    } finally {
        logger.debug("Took {} µs to get stats from {}.{}", timeGetStats.elapsed(TimeUnit.NANOSECONDS) / 1000, table.getDbName(), table.getTableName());
    }
}
Also used : Table(org.apache.hadoop.hive.ql.metadata.Table) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) IOException(java.io.IOException) Properties(java.util.Properties) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) IOException(java.io.IOException)

Example 57 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class QuerySubmitter method submitQuery.

public int submitQuery(DrillClient client, String plan, String type, String format, int width) throws Exception {
    String[] queries;
    QueryType queryType;
    type = type.toLowerCase();
    switch(type) {
        case "sql":
            queryType = QueryType.SQL;
            queries = plan.trim().split(";");
            break;
        case "logical":
            queryType = QueryType.LOGICAL;
            queries = new String[] { plan };
            break;
        case "physical":
            queryType = QueryType.PHYSICAL;
            queries = new String[] { plan };
            break;
        default:
            System.out.println("Invalid query type: " + type);
            return -1;
    }
    Format outputFormat;
    format = format.toLowerCase();
    switch(format) {
        case "csv":
            outputFormat = Format.CSV;
            break;
        case "tsv":
            outputFormat = Format.TSV;
            break;
        case "table":
            outputFormat = Format.TABLE;
            break;
        default:
            System.out.println("Invalid format type: " + format);
            return -1;
    }
    Stopwatch watch = Stopwatch.createUnstarted();
    for (String query : queries) {
        AwaitableUserResultsListener listener = new AwaitableUserResultsListener(new LoggingResultsListener(client.getConfig(), outputFormat, width));
        watch.start();
        client.runQuery(queryType, query, listener);
        int rows = listener.await();
        System.out.println(String.format("%d record%s selected (%f seconds)", rows, rows > 1 ? "s" : "", (float) watch.elapsed(TimeUnit.MILLISECONDS) / (float) 1000));
        if (query != queries[queries.length - 1]) {
            System.out.println();
        }
        watch.stop();
        watch.reset();
    }
    return 0;
}
Also used : AwaitableUserResultsListener(org.apache.drill.exec.rpc.user.AwaitableUserResultsListener) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) QueryType(org.apache.drill.exec.proto.UserBitShared.QueryType)

Example 58 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class PriorityQueueTemplate method add.

@Override
public void add(RecordBatchData batch) throws SchemaChangeException {
    Stopwatch watch = Stopwatch.createStarted();
    if (hyperBatch == null) {
        hyperBatch = new ExpandableHyperContainer(batch.getContainer());
    } else {
        hyperBatch.addBatch(batch.getContainer());
    }
    // may not need to do this every time
    doSetup(hyperBatch, null);
    int count = 0;
    SelectionVector2 sv2 = null;
    if (hasSv2) {
        sv2 = batch.getSv2();
    }
    // one or many batches. For each new record siftUp (or heapify) to adjust min heap property is called.
    for (; queueSize < limit && count < batch.getRecordCount(); count++) {
        heapSv4.set(queueSize, batchCount, hasSv2 ? sv2.getIndex(count) : count);
        queueSize++;
        siftUp();
    }
    // are still keeping all the records unless purge is called.
    for (; count < batch.getRecordCount(); count++) {
        heapSv4.set(limit, batchCount, hasSv2 ? sv2.getIndex(count) : count);
        if (compare(limit, 0) < 0) {
            swap(limit, 0);
            siftDown();
        }
    }
    batchCount++;
    if (hasSv2) {
        sv2.clear();
    }
    logger.debug("Took {} us to add {} records", watch.elapsed(TimeUnit.MICROSECONDS), count);
}
Also used : ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) SelectionVector2(org.apache.drill.exec.record.selection.SelectionVector2)

Example 59 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class TopNBatch method innerNext.

@Override
public IterOutcome innerNext() {
    recordCount = 0;
    if (state == BatchState.DONE) {
        return NONE;
    }
    // Check if anything is remaining from previous record boundary
    if (hasOutputRecords) {
        return handleRemainingOutput();
    }
    // Reset the TopN state for next iteration
    resetTopNState();
    boolean incomingHasSv2 = false;
    switch(incoming.getSchema().getSelectionVectorMode()) {
        case NONE:
            {
                break;
            }
        case TWO_BYTE:
            {
                incomingHasSv2 = true;
                break;
            }
        case FOUR_BYTE:
            {
                throw UserException.internalError(null).message("TopN doesn't support incoming with SV4 mode").build(logger);
            }
        default:
            throw new UnsupportedOperationException("Unsupported SV mode detected in TopN incoming batch");
    }
    outer: while (true) {
        Stopwatch watch = Stopwatch.createStarted();
        if (first) {
            lastKnownOutcome = IterOutcome.OK_NEW_SCHEMA;
            // Create the SV4 object upfront to be used for both empty and non-empty incoming batches at EMIT boundary
            sv4 = new SelectionVector4(context.getAllocator(), 0);
            first = false;
        } else {
            lastKnownOutcome = next(incoming);
        }
        if (lastKnownOutcome == OK && schema == null) {
            lastKnownOutcome = IterOutcome.OK_NEW_SCHEMA;
            container.clear();
        }
        logger.debug("Took {} us to get next", watch.elapsed(TimeUnit.MICROSECONDS));
        switch(lastKnownOutcome) {
            case NONE:
                break outer;
            case NOT_YET:
                throw new UnsupportedOperationException();
            case OK_NEW_SCHEMA:
                // only change in the case that the schema truly changes.  Artificial schema changes are ignored.
                // schema change handling in case when EMIT is also seen is same as without EMIT. i.e. only if union type
                // is enabled it will be handled.
                container.clear();
                firstBatchForSchema = true;
                if (!incoming.getSchema().equals(schema)) {
                    if (schema != null) {
                        if (!unionTypeEnabled) {
                            throw new UnsupportedOperationException(String.format("TopN currently doesn't support changing " + "schemas with union type disabled. Please try enabling union type: %s and re-execute the query", ExecConstants.ENABLE_UNION_TYPE_KEY));
                        } else {
                            schema = SchemaUtil.mergeSchemas(this.schema, incoming.getSchema());
                            purgeAndResetPriorityQueue();
                            schemaChanged = true;
                        }
                    } else {
                        schema = incoming.getSchema();
                    }
                }
            // fall through.
            case OK:
            case EMIT:
                if (incoming.getRecordCount() == 0) {
                    for (VectorWrapper<?> w : incoming) {
                        w.clear();
                    }
                    // Release memory for incoming SV2 vector
                    if (incomingHasSv2) {
                        incoming.getSelectionVector2().clear();
                    }
                    break;
                }
                countSincePurge += incoming.getRecordCount();
                batchCount++;
                RecordBatchData batch;
                if (schemaChanged) {
                    batch = new RecordBatchData(SchemaUtil.coerceContainer(incoming, this.schema, oContext), oContext.getAllocator());
                } else {
                    batch = new RecordBatchData(incoming, oContext.getAllocator());
                }
                boolean success = false;
                try {
                    if (priorityQueue == null) {
                        priorityQueue = createNewPriorityQueue(new ExpandableHyperContainer(batch.getContainer()), config.getLimit());
                    } else if (!priorityQueue.isInitialized()) {
                        // means priority queue is cleaned up after producing output for first record boundary. We should
                        // initialize it for next record boundary
                        priorityQueue.init(config.getLimit(), oContext.getAllocator(), schema.getSelectionVectorMode() == SelectionVectorMode.TWO_BYTE);
                    }
                    priorityQueue.add(batch);
                    // RecordBatches which are of no use or doesn't fall under TopN category
                    if (countSincePurge > config.getLimit() && batchCount > batchPurgeThreshold) {
                        purge();
                        countSincePurge = 0;
                        batchCount = 0;
                    }
                    success = true;
                } catch (SchemaChangeException e) {
                    throw schemaChangeException(e, logger);
                } finally {
                    if (!success) {
                        batch.clear();
                    }
                }
                break;
            default:
                throw new UnsupportedOperationException();
        }
        // with records and EMIT outcome in above case statements
        if (lastKnownOutcome == EMIT) {
            break;
        }
    }
    // PriorityQueue can be uninitialized here if only empty batch is received between 2 EMIT outcome.
    if (schema == null || (priorityQueue == null || !priorityQueue.isInitialized())) {
        // builder may be null at this point if the first incoming batch is empty
        return handleEmptyBatches(lastKnownOutcome);
    }
    priorityQueue.generate();
    prepareOutputContainer(priorityQueue.getHyperBatch(), priorityQueue.getFinalSv4());
    // lastKnownOutcome.
    return getFinalOutcome();
}
Also used : ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) RecordBatchData(org.apache.drill.exec.physical.impl.sort.RecordBatchData) HyperVectorWrapper(org.apache.drill.exec.record.HyperVectorWrapper) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 60 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class TopNBatch method purge.

/**
 * When PriorityQueue is built up then it stores the list of limit number of
 * record indexes (in heapSv4) which falls under TopN category. But it also
 * stores all the incoming RecordBatches with all records inside a
 * HyperContainer (hyperBatch). When a certain threshold of batches are
 * reached then this method is called which copies the limit number of records
 * whose indexes are stored in heapSv4 out of HyperBatch to a new
 * VectorContainer and releases all other records and their batches. Later
 * this new VectorContainer is stored inside the HyperBatch and it's
 * corresponding indexes are stored in the heapSv4 vector. This is done to
 * avoid holding up lot's of Record Batches which can create OutOfMemory
 * condition.
 */
private void purge() {
    Stopwatch watch = Stopwatch.createStarted();
    VectorContainer c = priorityQueue.getHyperBatch();
    // Simple VectorConatiner which stores limit number of records only. The records whose indexes are stored inside
    // selectionVector4 below are only copied from Hyper container to this simple container.
    VectorContainer newContainer = new VectorContainer(oContext);
    // SV4 storing the limit number of indexes
    SelectionVector4 selectionVector4 = priorityQueue.getSv4();
    SimpleSV4RecordBatch batch = new SimpleSV4RecordBatch(c, selectionVector4, context);
    if (copier == null) {
        copier = GenericCopierFactory.createAndSetupCopier(batch, newContainer, null);
    } else {
        for (VectorWrapper<?> i : batch) {
            ValueVector v = TypeHelper.getNewVector(i.getField(), oContext.getAllocator());
            newContainer.add(v);
        }
        copier.setup(batch, newContainer);
    }
    SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
    try {
        // Purge all the existing batches to a new batch which only holds the selected records
        copyToPurge(newContainer, builder);
        // New VectorContainer that contains only limit number of records and is later passed to resetQueue to create a
        // HyperContainer backing the priority queue out of it
        VectorContainer newQueue = new VectorContainer();
        builder.build(newQueue);
        try {
            priorityQueue.resetQueue(newQueue, builder.getSv4().createNewWrapperCurrent());
        } catch (SchemaChangeException e) {
            throw schemaChangeException(e, logger);
        }
        builder.getSv4().clear();
    } finally {
        DrillAutoCloseables.closeNoChecked(builder);
    }
    logger.debug("Took {} us to purge", watch.elapsed(TimeUnit.MICROSECONDS));
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) VectorContainer(org.apache.drill.exec.record.VectorContainer) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Aggregations

Stopwatch (org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)68 IOException (java.io.IOException)13 Path (org.apache.hadoop.fs.Path)12 ArrayList (java.util.ArrayList)8 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)8 FileStatus (org.apache.hadoop.fs.FileStatus)8 DrillBuf (io.netty.buffer.DrillBuf)7 ByteBuffer (java.nio.ByteBuffer)7 SchemaPath (org.apache.drill.common.expression.SchemaPath)7 HashMap (java.util.HashMap)5 RelNode (org.apache.calcite.rel.RelNode)5 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)4 DrillbitEndpoint (org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)4 VectorContainer (org.apache.drill.exec.record.VectorContainer)4 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)4 ValueVector (org.apache.drill.exec.vector.ValueVector)4 CompressionCodecName (org.apache.parquet.hadoop.metadata.CompressionCodecName)4 File (java.io.File)3 ResultSet (java.sql.ResultSet)3 ResultSetMetaData (java.sql.ResultSetMetaData)3