use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.
the class HiveMetadataProvider method getStats.
/**
* Return stats for table/partitions in given {@link HiveReadEntry}.
* If valid stats are available in MetaStore, return it.
* Otherwise estimate using the size of the input data.
*
* @param hiveReadEntry Subset of the {@link HiveReadEntry} used when creating this cache object.
* @return hive statistics holder
* @throws IOException if was unable to retrieve table statistics
*/
public HiveStats getStats(final HiveReadEntry hiveReadEntry) throws IOException {
Stopwatch timeGetStats = Stopwatch.createStarted();
HiveTableWithColumnCache table = hiveReadEntry.getTable();
try {
if (!isPartitionedTable) {
Properties properties = new Table(table).getMetadata();
HiveStats stats = HiveStats.getStatsFromProps(properties);
if (stats.valid()) {
return stats;
}
return stats.getSizeInBytes() > 0 ? estimateStatsFromBytes(stats.getSizeInBytes()) : estimateStatsFromInputSplits(getTableInputSplits());
} else {
HiveStats aggStats = new HiveStats(0, 0);
for (HivePartition partition : hiveReadEntry.getPartitions()) {
Properties properties = HiveUtilities.getPartitionMetadata(partition, table);
HiveStats stats = HiveStats.getStatsFromProps(properties);
if (!stats.valid()) {
stats = stats.getSizeInBytes() > 0 ? estimateStatsFromBytes(stats.getSizeInBytes()) : estimateStatsFromInputSplits(getPartitionInputSplits(partition));
}
aggStats.add(stats);
}
return aggStats;
}
} catch (Exception e) {
throw new IOException("Failed to get number of rows and total size from HiveTable", e);
} finally {
logger.debug("Took {} µs to get stats from {}.{}", timeGetStats.elapsed(TimeUnit.NANOSECONDS) / 1000, table.getDbName(), table.getTableName());
}
}
use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.
the class QuerySubmitter method submitQuery.
public int submitQuery(DrillClient client, String plan, String type, String format, int width) throws Exception {
String[] queries;
QueryType queryType;
type = type.toLowerCase();
switch(type) {
case "sql":
queryType = QueryType.SQL;
queries = plan.trim().split(";");
break;
case "logical":
queryType = QueryType.LOGICAL;
queries = new String[] { plan };
break;
case "physical":
queryType = QueryType.PHYSICAL;
queries = new String[] { plan };
break;
default:
System.out.println("Invalid query type: " + type);
return -1;
}
Format outputFormat;
format = format.toLowerCase();
switch(format) {
case "csv":
outputFormat = Format.CSV;
break;
case "tsv":
outputFormat = Format.TSV;
break;
case "table":
outputFormat = Format.TABLE;
break;
default:
System.out.println("Invalid format type: " + format);
return -1;
}
Stopwatch watch = Stopwatch.createUnstarted();
for (String query : queries) {
AwaitableUserResultsListener listener = new AwaitableUserResultsListener(new LoggingResultsListener(client.getConfig(), outputFormat, width));
watch.start();
client.runQuery(queryType, query, listener);
int rows = listener.await();
System.out.println(String.format("%d record%s selected (%f seconds)", rows, rows > 1 ? "s" : "", (float) watch.elapsed(TimeUnit.MILLISECONDS) / (float) 1000));
if (query != queries[queries.length - 1]) {
System.out.println();
}
watch.stop();
watch.reset();
}
return 0;
}
use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.
the class PriorityQueueTemplate method add.
@Override
public void add(RecordBatchData batch) throws SchemaChangeException {
Stopwatch watch = Stopwatch.createStarted();
if (hyperBatch == null) {
hyperBatch = new ExpandableHyperContainer(batch.getContainer());
} else {
hyperBatch.addBatch(batch.getContainer());
}
// may not need to do this every time
doSetup(hyperBatch, null);
int count = 0;
SelectionVector2 sv2 = null;
if (hasSv2) {
sv2 = batch.getSv2();
}
// one or many batches. For each new record siftUp (or heapify) to adjust min heap property is called.
for (; queueSize < limit && count < batch.getRecordCount(); count++) {
heapSv4.set(queueSize, batchCount, hasSv2 ? sv2.getIndex(count) : count);
queueSize++;
siftUp();
}
// are still keeping all the records unless purge is called.
for (; count < batch.getRecordCount(); count++) {
heapSv4.set(limit, batchCount, hasSv2 ? sv2.getIndex(count) : count);
if (compare(limit, 0) < 0) {
swap(limit, 0);
siftDown();
}
}
batchCount++;
if (hasSv2) {
sv2.clear();
}
logger.debug("Took {} us to add {} records", watch.elapsed(TimeUnit.MICROSECONDS), count);
}
use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.
the class TopNBatch method innerNext.
@Override
public IterOutcome innerNext() {
recordCount = 0;
if (state == BatchState.DONE) {
return NONE;
}
// Check if anything is remaining from previous record boundary
if (hasOutputRecords) {
return handleRemainingOutput();
}
// Reset the TopN state for next iteration
resetTopNState();
boolean incomingHasSv2 = false;
switch(incoming.getSchema().getSelectionVectorMode()) {
case NONE:
{
break;
}
case TWO_BYTE:
{
incomingHasSv2 = true;
break;
}
case FOUR_BYTE:
{
throw UserException.internalError(null).message("TopN doesn't support incoming with SV4 mode").build(logger);
}
default:
throw new UnsupportedOperationException("Unsupported SV mode detected in TopN incoming batch");
}
outer: while (true) {
Stopwatch watch = Stopwatch.createStarted();
if (first) {
lastKnownOutcome = IterOutcome.OK_NEW_SCHEMA;
// Create the SV4 object upfront to be used for both empty and non-empty incoming batches at EMIT boundary
sv4 = new SelectionVector4(context.getAllocator(), 0);
first = false;
} else {
lastKnownOutcome = next(incoming);
}
if (lastKnownOutcome == OK && schema == null) {
lastKnownOutcome = IterOutcome.OK_NEW_SCHEMA;
container.clear();
}
logger.debug("Took {} us to get next", watch.elapsed(TimeUnit.MICROSECONDS));
switch(lastKnownOutcome) {
case NONE:
break outer;
case NOT_YET:
throw new UnsupportedOperationException();
case OK_NEW_SCHEMA:
// only change in the case that the schema truly changes. Artificial schema changes are ignored.
// schema change handling in case when EMIT is also seen is same as without EMIT. i.e. only if union type
// is enabled it will be handled.
container.clear();
firstBatchForSchema = true;
if (!incoming.getSchema().equals(schema)) {
if (schema != null) {
if (!unionTypeEnabled) {
throw new UnsupportedOperationException(String.format("TopN currently doesn't support changing " + "schemas with union type disabled. Please try enabling union type: %s and re-execute the query", ExecConstants.ENABLE_UNION_TYPE_KEY));
} else {
schema = SchemaUtil.mergeSchemas(this.schema, incoming.getSchema());
purgeAndResetPriorityQueue();
schemaChanged = true;
}
} else {
schema = incoming.getSchema();
}
}
// fall through.
case OK:
case EMIT:
if (incoming.getRecordCount() == 0) {
for (VectorWrapper<?> w : incoming) {
w.clear();
}
// Release memory for incoming SV2 vector
if (incomingHasSv2) {
incoming.getSelectionVector2().clear();
}
break;
}
countSincePurge += incoming.getRecordCount();
batchCount++;
RecordBatchData batch;
if (schemaChanged) {
batch = new RecordBatchData(SchemaUtil.coerceContainer(incoming, this.schema, oContext), oContext.getAllocator());
} else {
batch = new RecordBatchData(incoming, oContext.getAllocator());
}
boolean success = false;
try {
if (priorityQueue == null) {
priorityQueue = createNewPriorityQueue(new ExpandableHyperContainer(batch.getContainer()), config.getLimit());
} else if (!priorityQueue.isInitialized()) {
// means priority queue is cleaned up after producing output for first record boundary. We should
// initialize it for next record boundary
priorityQueue.init(config.getLimit(), oContext.getAllocator(), schema.getSelectionVectorMode() == SelectionVectorMode.TWO_BYTE);
}
priorityQueue.add(batch);
// RecordBatches which are of no use or doesn't fall under TopN category
if (countSincePurge > config.getLimit() && batchCount > batchPurgeThreshold) {
purge();
countSincePurge = 0;
batchCount = 0;
}
success = true;
} catch (SchemaChangeException e) {
throw schemaChangeException(e, logger);
} finally {
if (!success) {
batch.clear();
}
}
break;
default:
throw new UnsupportedOperationException();
}
// with records and EMIT outcome in above case statements
if (lastKnownOutcome == EMIT) {
break;
}
}
// PriorityQueue can be uninitialized here if only empty batch is received between 2 EMIT outcome.
if (schema == null || (priorityQueue == null || !priorityQueue.isInitialized())) {
// builder may be null at this point if the first incoming batch is empty
return handleEmptyBatches(lastKnownOutcome);
}
priorityQueue.generate();
prepareOutputContainer(priorityQueue.getHyperBatch(), priorityQueue.getFinalSv4());
// lastKnownOutcome.
return getFinalOutcome();
}
use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.
the class TopNBatch method purge.
/**
* When PriorityQueue is built up then it stores the list of limit number of
* record indexes (in heapSv4) which falls under TopN category. But it also
* stores all the incoming RecordBatches with all records inside a
* HyperContainer (hyperBatch). When a certain threshold of batches are
* reached then this method is called which copies the limit number of records
* whose indexes are stored in heapSv4 out of HyperBatch to a new
* VectorContainer and releases all other records and their batches. Later
* this new VectorContainer is stored inside the HyperBatch and it's
* corresponding indexes are stored in the heapSv4 vector. This is done to
* avoid holding up lot's of Record Batches which can create OutOfMemory
* condition.
*/
private void purge() {
Stopwatch watch = Stopwatch.createStarted();
VectorContainer c = priorityQueue.getHyperBatch();
// Simple VectorConatiner which stores limit number of records only. The records whose indexes are stored inside
// selectionVector4 below are only copied from Hyper container to this simple container.
VectorContainer newContainer = new VectorContainer(oContext);
// SV4 storing the limit number of indexes
SelectionVector4 selectionVector4 = priorityQueue.getSv4();
SimpleSV4RecordBatch batch = new SimpleSV4RecordBatch(c, selectionVector4, context);
if (copier == null) {
copier = GenericCopierFactory.createAndSetupCopier(batch, newContainer, null);
} else {
for (VectorWrapper<?> i : batch) {
ValueVector v = TypeHelper.getNewVector(i.getField(), oContext.getAllocator());
newContainer.add(v);
}
copier.setup(batch, newContainer);
}
SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
try {
// Purge all the existing batches to a new batch which only holds the selected records
copyToPurge(newContainer, builder);
// New VectorContainer that contains only limit number of records and is later passed to resetQueue to create a
// HyperContainer backing the priority queue out of it
VectorContainer newQueue = new VectorContainer();
builder.build(newQueue);
try {
priorityQueue.resetQueue(newQueue, builder.getSv4().createNewWrapperCurrent());
} catch (SchemaChangeException e) {
throw schemaChangeException(e, logger);
}
builder.getSv4().clear();
} finally {
DrillAutoCloseables.closeNoChecked(builder);
}
logger.debug("Took {} us to purge", watch.elapsed(TimeUnit.MICROSECONDS));
}
Aggregations