use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.
the class RuntimeFilterSink method aggregate.
private void aggregate(RuntimeFilterWritable srcRuntimeFilterWritable) {
BitData.RuntimeFilterBDef runtimeFilterB = srcRuntimeFilterWritable.getRuntimeFilterBDef();
int joinMajorId = runtimeFilterB.getMajorFragmentId();
int buildSideRfNumber;
RuntimeFilterWritable toAggregated = null;
buildSideRfNumber = joinMjId2rfNumber.get(joinMajorId);
buildSideRfNumber--;
joinMjId2rfNumber.put(joinMajorId, buildSideRfNumber);
toAggregated = joinMjId2AggregatedRF.get(joinMajorId);
if (toAggregated == null) {
toAggregated = srcRuntimeFilterWritable;
toAggregated.retainBuffers(1);
} else {
toAggregated.aggregate(srcRuntimeFilterWritable);
}
joinMjId2AggregatedRF.put(joinMajorId, toAggregated);
if (buildSideRfNumber == 0) {
joinMjId2AggregatedRF.remove(joinMajorId);
route(toAggregated);
joinMjId2rfNumber.remove(joinMajorId);
Stopwatch stopwatch = joinMjId2Stopwatch.get(joinMajorId);
logger.info("received all the RFWs belonging to the majorId {}'s HashJoin nodes and flushed aggregated RFW out elapsed {} ms", joinMajorId, stopwatch.elapsed(TimeUnit.MILLISECONDS));
}
}
use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.
the class BufferedDirectBufInputStream method getNextBlock.
/**
* Read one more block from the underlying stream.
* Assumes we have reached the end of buffered data
* Assumes it is being called from a synchronized block.
* returns number of bytes read or -1 if EOF
*/
private int getNextBlock() throws IOException {
Preconditions.checkState(this.curPosInBuffer >= this.count, "Internal error: Buffered stream has not been consumed and trying to read more from underlying stream");
checkInputStreamState();
DrillBuf buffer = getBuf();
buffer.clear();
this.count = this.curPosInBuffer = 0;
if (logger.isTraceEnabled()) {
logger.trace("PERF: Disk read start. {}, StartOffset: {}, TotalByteSize: {}, BufferSize: {}, Count: {}, " + "CurPosInStream: {}, CurPosInBuffer: {}", this.streamId, this.startOffset, this.totalByteSize, this.bufSize, this.count, this.curPosInStream, this.curPosInBuffer);
}
Stopwatch timer = Stopwatch.createStarted();
int bytesToRead = 0;
// much data, we reduce the size of the buffer, down to 64KiB.
if (enforceTotalByteSize) {
bytesToRead = (buffer.capacity() >= (totalByteSize + startOffset - curPosInStream)) ? (int) (totalByteSize + startOffset - curPosInStream) : buffer.capacity();
} else {
if (buffer.capacity() >= (totalByteSize + startOffset - curPosInStream)) {
if (buffer.capacity() > SMALL_BUFFER_SIZE) {
buffer = this.reallocBuffer(SMALL_BUFFER_SIZE);
}
}
bytesToRead = buffer.capacity();
}
ByteBuffer directBuffer = buffer.nioBuffer(curPosInBuffer, bytesToRead);
// The DFS can return *more* bytes than requested if the capacity of the buffer is greater.
// i.e 'n' can be greater than bytes requested which is pretty stupid and violates
// the API contract; but we still have to deal with it. So we make sure the size of the
// buffer is exactly the same as the number of bytes requested
int bytesRead = -1;
int nBytes = 0;
if (bytesToRead > 0) {
try {
nBytes = HadoopStreams.wrap(getInputStream()).read(directBuffer);
} catch (Exception e) {
logger.error("Error reading from stream {}. Error was : {}", this.streamId, e.getMessage());
throw new IOException((e));
}
if (nBytes > 0) {
buffer.writerIndex(nBytes);
this.count = nBytes + this.curPosInBuffer;
this.curPosInStream = getInputStream().getPos();
bytesRead = nBytes;
if (logger.isTraceEnabled()) {
logger.trace("PERF: Disk read complete. {}, StartOffset: {}, TotalByteSize: {}, BufferSize: {}, BytesRead: {}, Count: {}, " + "CurPosInStream: {}, CurPosInBuffer: {}, Time: {} ms", this.streamId, this.startOffset, this.totalByteSize, this.bufSize, bytesRead, this.count, this.curPosInStream, this.curPosInBuffer, ((double) timer.elapsed(TimeUnit.MICROSECONDS)) / 1000);
}
}
}
return this.count - this.curPosInBuffer;
}
use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.
the class RunRootExec method main.
public static void main(String[] args) throws Exception {
String path = args[0];
int iterations = Integer.parseInt(args[1]);
Drillbit bit = new Drillbit(c, RemoteServiceSet.getLocalServiceSet(), ClassPathScanner.fromPrescan(c));
bit.run();
DrillbitContext bitContext = bit.getContext();
PhysicalPlanReader reader = bitContext.getPlanReader();
PhysicalPlan plan = reader.readPhysicalPlan(Files.asCharSource(new File(path), Charsets.UTF_8).read());
FunctionImplementationRegistry registry = bitContext.getFunctionImplementationRegistry();
FragmentContextImpl context = new FragmentContextImpl(bitContext, PlanFragment.getDefaultInstance(), null, registry);
SimpleRootExec exec;
for (int i = 0; i < iterations; i++) {
Stopwatch w = Stopwatch.createStarted();
logger.info("STARTITER: {}", i);
exec = new SimpleRootExec(ImplCreator.getExec(context, (FragmentRoot) plan.getSortedOperators(false).iterator().next()));
while (exec.next()) {
for (ValueVector v : exec) {
v.clear();
}
}
logger.info("ENDITER: {}", i);
logger.info("TIME: {}ms", w.elapsed(TimeUnit.MILLISECONDS));
exec.close();
}
context.close();
bit.close();
}
use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.
the class MaprDBJsonRecordReader method next.
@Override
public int next() {
Stopwatch watch = Stopwatch.createUnstarted();
watch.start();
vectorWriter.allocate();
vectorWriter.reset();
int recordCount = 0;
reader = null;
document = null;
int maxRecordsForThisBatch = this.maxRecordsToRead >= 0 ? Math.min(BaseValueVector.INITIAL_VALUE_ALLOCATION, this.maxRecordsToRead) : BaseValueVector.INITIAL_VALUE_ALLOCATION;
try {
// If the last document caused a SchemaChange create a new output schema for this scan batch
if (schemaState == SchemaState.SCHEMA_CHANGE && !ignoreSchemaChange) {
// Clear the ScanBatch vector container writer/mutator in order to be able to generate the new schema
vectorWriterMutator.clear();
vectorWriter = new VectorContainerWriter(vectorWriterMutator, unionEnabled);
logger.debug("Encountered schema change earlier use new writer {}", vectorWriter.toString());
document = lastDocument;
setupWriter();
if (recordCount < maxRecordsForThisBatch) {
vectorWriter.setPosition(recordCount);
if (document != null) {
reader = (DBDocumentReaderBase) document.asReader();
documentWriter.writeDBDocument(vectorWriter, reader);
recordCount++;
}
}
}
} catch (SchemaChangeException e) {
String err_row = reader.getId().asJsonString();
if (ignoreSchemaChange) {
logger.warn("{}. Dropping row '{}' from result.", e.getMessage(), err_row);
logger.debug("Stack trace:", e);
} else {
/* We should not encounter a SchemaChangeException here since this is the first document for this
* new schema. Something is very wrong - cannot handle any further!
*/
throw dataReadError(logger, e, "SchemaChangeException for row '%s'.", err_row);
}
}
schemaState = SchemaState.SCHEMA_INIT;
while (recordCount < maxRecordsForThisBatch) {
vectorWriter.setPosition(recordCount);
try {
document = nextDocument();
if (document == null) {
// no more documents for this reader
break;
} else {
documentWriter.writeDBDocument(vectorWriter, (DBDocumentReaderBase) document.asReader());
}
recordCount++;
} catch (UserException e) {
throw UserException.unsupportedError(e).addContext(String.format("Table: %s, document id: '%s'", table.getPath(), document.asReader() == null ? null : IdCodec.asString(((DBDocumentReaderBase) document.asReader()).getId()))).build(logger);
} catch (SchemaChangeException e) {
String err_row = ((DBDocumentReaderBase) document.asReader()).getId().asJsonString();
if (ignoreSchemaChange) {
logger.warn("{}. Dropping row '{}' from result.", e.getMessage(), err_row);
logger.debug("Stack trace:", e);
} else {
/* Save the current document reader for next iteration. The recordCount is not updated so we
* would start from this reader on the next next() call
*/
lastDocument = document;
schemaState = SchemaState.SCHEMA_CHANGE;
break;
}
}
}
if (nonExistentColumnsProjection && recordCount > 0) {
if (schema == null || schema.isEmpty()) {
JsonReaderUtils.ensureAtLeastOneField(vectorWriter, getColumns(), allTextMode, Collections.emptyList());
} else {
JsonReaderUtils.writeColumnsUsingSchema(vectorWriter, getColumns(), schema, allTextMode);
}
}
vectorWriter.setValueCount(recordCount);
if (maxRecordsToRead > 0) {
maxRecordsToRead -= recordCount;
}
logger.debug("Took {} ms to get {} records", watch.elapsed(TimeUnit.MILLISECONDS), recordCount);
return recordCount;
}
use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.
the class RestrictedJsonRecordReader method next.
@Override
public int next() {
Stopwatch watch = Stopwatch.createUnstarted();
watch.start();
RestrictedMapRDBSubScanSpec rss = ((RestrictedMapRDBSubScanSpec) this.subScanSpec);
vectorWriter.allocate();
vectorWriter.reset();
if (!rss.readyToGetRowKey()) {
// when we are in the build schema phase
if (rss.isBuildSchemaPhase()) {
readToInitSchema();
}
return 0;
}
Table table = super.formatPlugin.getJsonTableCache().getTable(subScanSpec.getTableName(), subScanSpec.getUserName());
final MultiGet multiGet = new MultiGet((BaseJsonTable) table, condition, false, projections);
int recordCount = 0;
DBDocumentReaderBase reader = null;
int maxRecordsForThisBatch = this.maxRecordsToRead > 0 ? Math.min(rss.getMaxRowKeysToBeRead(), this.maxRecordsToRead) : this.maxRecordsToRead == -1 ? rss.getMaxRowKeysToBeRead() : 0;
Stopwatch timer = Stopwatch.createUnstarted();
while (recordCount < maxRecordsForThisBatch) {
ByteBuffer[] rowKeyIds = rss.getRowKeyIdsToRead(batchSize);
if (rowKeyIds == null) {
break;
}
try {
timer.start();
final List<Document> docList = multiGet.doGet(rowKeyIds);
int index = 0;
long docsToRead = docList.size();
// If limit pushdown then stop once we have `limit` rows from multiget i.e. maxRecordsForThisBatch
if (this.maxRecordsToRead != -1) {
docsToRead = Math.min(docsToRead, maxRecordsForThisBatch);
}
while (index < docsToRead) {
vectorWriter.setPosition(recordCount);
reader = (DBDocumentReaderBase) docList.get(index).asReader();
documentWriter.writeDBDocument(vectorWriter, reader);
recordCount++;
index++;
}
timer.stop();
} catch (UserException e) {
throw UserException.unsupportedError(e).addContext(String.format("Table: %s, document id: '%s'", getTable().getPath(), reader == null ? null : IdCodec.asString(reader.getId()))).build(logger);
} catch (SchemaChangeException e) {
if (getIgnoreSchemaChange()) {
logger.warn("{}. Dropping the row from result.", e.getMessage());
logger.debug("Stack trace:", e);
} else {
throw dataReadError(logger, e);
}
}
}
vectorWriter.setValueCount(recordCount);
if (maxRecordsToRead > 0) {
if (maxRecordsToRead - recordCount >= 0) {
maxRecordsToRead -= recordCount;
} else {
maxRecordsToRead = 0;
}
}
logger.debug("Took {} ms to get {} records, getrowkey {}", watch.elapsed(TimeUnit.MILLISECONDS), recordCount, timer.elapsed(TimeUnit.MILLISECONDS));
return recordCount;
}
Aggregations