Search in sources :

Example 46 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class RuntimeFilterSink method aggregate.

private void aggregate(RuntimeFilterWritable srcRuntimeFilterWritable) {
    BitData.RuntimeFilterBDef runtimeFilterB = srcRuntimeFilterWritable.getRuntimeFilterBDef();
    int joinMajorId = runtimeFilterB.getMajorFragmentId();
    int buildSideRfNumber;
    RuntimeFilterWritable toAggregated = null;
    buildSideRfNumber = joinMjId2rfNumber.get(joinMajorId);
    buildSideRfNumber--;
    joinMjId2rfNumber.put(joinMajorId, buildSideRfNumber);
    toAggregated = joinMjId2AggregatedRF.get(joinMajorId);
    if (toAggregated == null) {
        toAggregated = srcRuntimeFilterWritable;
        toAggregated.retainBuffers(1);
    } else {
        toAggregated.aggregate(srcRuntimeFilterWritable);
    }
    joinMjId2AggregatedRF.put(joinMajorId, toAggregated);
    if (buildSideRfNumber == 0) {
        joinMjId2AggregatedRF.remove(joinMajorId);
        route(toAggregated);
        joinMjId2rfNumber.remove(joinMajorId);
        Stopwatch stopwatch = joinMjId2Stopwatch.get(joinMajorId);
        logger.info("received all the RFWs belonging to the majorId {}'s HashJoin nodes and flushed aggregated RFW out elapsed {} ms", joinMajorId, stopwatch.elapsed(TimeUnit.MILLISECONDS));
    }
}
Also used : Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) BitData(org.apache.drill.exec.proto.BitData)

Example 47 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class BufferedDirectBufInputStream method getNextBlock.

/**
 * Read one more block from the underlying stream.
 * Assumes we have reached the end of buffered data
 * Assumes it is being called from a synchronized block.
 * returns number of bytes read or -1 if EOF
 */
private int getNextBlock() throws IOException {
    Preconditions.checkState(this.curPosInBuffer >= this.count, "Internal error: Buffered stream has not been consumed and trying to read more from underlying stream");
    checkInputStreamState();
    DrillBuf buffer = getBuf();
    buffer.clear();
    this.count = this.curPosInBuffer = 0;
    if (logger.isTraceEnabled()) {
        logger.trace("PERF: Disk read start. {}, StartOffset: {}, TotalByteSize: {}, BufferSize: {}, Count: {}, " + "CurPosInStream: {}, CurPosInBuffer: {}", this.streamId, this.startOffset, this.totalByteSize, this.bufSize, this.count, this.curPosInStream, this.curPosInBuffer);
    }
    Stopwatch timer = Stopwatch.createStarted();
    int bytesToRead = 0;
    // much data, we reduce the size of the buffer, down to 64KiB.
    if (enforceTotalByteSize) {
        bytesToRead = (buffer.capacity() >= (totalByteSize + startOffset - curPosInStream)) ? (int) (totalByteSize + startOffset - curPosInStream) : buffer.capacity();
    } else {
        if (buffer.capacity() >= (totalByteSize + startOffset - curPosInStream)) {
            if (buffer.capacity() > SMALL_BUFFER_SIZE) {
                buffer = this.reallocBuffer(SMALL_BUFFER_SIZE);
            }
        }
        bytesToRead = buffer.capacity();
    }
    ByteBuffer directBuffer = buffer.nioBuffer(curPosInBuffer, bytesToRead);
    // The DFS can return *more* bytes than requested if the capacity of the buffer is greater.
    // i.e 'n' can be greater than bytes requested which is pretty stupid and violates
    // the API contract; but we still have to deal with it. So we make sure the size of the
    // buffer is exactly the same as the number of bytes requested
    int bytesRead = -1;
    int nBytes = 0;
    if (bytesToRead > 0) {
        try {
            nBytes = HadoopStreams.wrap(getInputStream()).read(directBuffer);
        } catch (Exception e) {
            logger.error("Error reading from stream {}. Error was : {}", this.streamId, e.getMessage());
            throw new IOException((e));
        }
        if (nBytes > 0) {
            buffer.writerIndex(nBytes);
            this.count = nBytes + this.curPosInBuffer;
            this.curPosInStream = getInputStream().getPos();
            bytesRead = nBytes;
            if (logger.isTraceEnabled()) {
                logger.trace("PERF: Disk read complete. {}, StartOffset: {}, TotalByteSize: {}, BufferSize: {}, BytesRead: {}, Count: {}, " + "CurPosInStream: {}, CurPosInBuffer: {}, Time: {} ms", this.streamId, this.startOffset, this.totalByteSize, this.bufSize, bytesRead, this.count, this.curPosInStream, this.curPosInBuffer, ((double) timer.elapsed(TimeUnit.MICROSECONDS)) / 1000);
            }
        }
    }
    return this.count - this.curPosInBuffer;
}
Also used : Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) IOException(java.io.IOException) DrillBuf(io.netty.buffer.DrillBuf)

Example 48 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class RunRootExec method main.

public static void main(String[] args) throws Exception {
    String path = args[0];
    int iterations = Integer.parseInt(args[1]);
    Drillbit bit = new Drillbit(c, RemoteServiceSet.getLocalServiceSet(), ClassPathScanner.fromPrescan(c));
    bit.run();
    DrillbitContext bitContext = bit.getContext();
    PhysicalPlanReader reader = bitContext.getPlanReader();
    PhysicalPlan plan = reader.readPhysicalPlan(Files.asCharSource(new File(path), Charsets.UTF_8).read());
    FunctionImplementationRegistry registry = bitContext.getFunctionImplementationRegistry();
    FragmentContextImpl context = new FragmentContextImpl(bitContext, PlanFragment.getDefaultInstance(), null, registry);
    SimpleRootExec exec;
    for (int i = 0; i < iterations; i++) {
        Stopwatch w = Stopwatch.createStarted();
        logger.info("STARTITER: {}", i);
        exec = new SimpleRootExec(ImplCreator.getExec(context, (FragmentRoot) plan.getSortedOperators(false).iterator().next()));
        while (exec.next()) {
            for (ValueVector v : exec) {
                v.clear();
            }
        }
        logger.info("ENDITER: {}", i);
        logger.info("TIME: {}ms", w.elapsed(TimeUnit.MILLISECONDS));
        exec.close();
    }
    context.close();
    bit.close();
}
Also used : DrillbitContext(org.apache.drill.exec.server.DrillbitContext) ValueVector(org.apache.drill.exec.vector.ValueVector) SimpleRootExec(org.apache.drill.exec.physical.impl.SimpleRootExec) PhysicalPlan(org.apache.drill.exec.physical.PhysicalPlan) Drillbit(org.apache.drill.exec.server.Drillbit) PhysicalPlanReader(org.apache.drill.exec.planner.PhysicalPlanReader) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) FragmentContextImpl(org.apache.drill.exec.ops.FragmentContextImpl) File(java.io.File) FunctionImplementationRegistry(org.apache.drill.exec.expr.fn.FunctionImplementationRegistry)

Example 49 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class MaprDBJsonRecordReader method next.

@Override
public int next() {
    Stopwatch watch = Stopwatch.createUnstarted();
    watch.start();
    vectorWriter.allocate();
    vectorWriter.reset();
    int recordCount = 0;
    reader = null;
    document = null;
    int maxRecordsForThisBatch = this.maxRecordsToRead >= 0 ? Math.min(BaseValueVector.INITIAL_VALUE_ALLOCATION, this.maxRecordsToRead) : BaseValueVector.INITIAL_VALUE_ALLOCATION;
    try {
        // If the last document caused a SchemaChange create a new output schema for this scan batch
        if (schemaState == SchemaState.SCHEMA_CHANGE && !ignoreSchemaChange) {
            // Clear the ScanBatch vector container writer/mutator in order to be able to generate the new schema
            vectorWriterMutator.clear();
            vectorWriter = new VectorContainerWriter(vectorWriterMutator, unionEnabled);
            logger.debug("Encountered schema change earlier use new writer {}", vectorWriter.toString());
            document = lastDocument;
            setupWriter();
            if (recordCount < maxRecordsForThisBatch) {
                vectorWriter.setPosition(recordCount);
                if (document != null) {
                    reader = (DBDocumentReaderBase) document.asReader();
                    documentWriter.writeDBDocument(vectorWriter, reader);
                    recordCount++;
                }
            }
        }
    } catch (SchemaChangeException e) {
        String err_row = reader.getId().asJsonString();
        if (ignoreSchemaChange) {
            logger.warn("{}. Dropping row '{}' from result.", e.getMessage(), err_row);
            logger.debug("Stack trace:", e);
        } else {
            /* We should not encounter a SchemaChangeException here since this is the first document for this
           * new schema. Something is very wrong - cannot handle any further!
           */
            throw dataReadError(logger, e, "SchemaChangeException for row '%s'.", err_row);
        }
    }
    schemaState = SchemaState.SCHEMA_INIT;
    while (recordCount < maxRecordsForThisBatch) {
        vectorWriter.setPosition(recordCount);
        try {
            document = nextDocument();
            if (document == null) {
                // no more documents for this reader
                break;
            } else {
                documentWriter.writeDBDocument(vectorWriter, (DBDocumentReaderBase) document.asReader());
            }
            recordCount++;
        } catch (UserException e) {
            throw UserException.unsupportedError(e).addContext(String.format("Table: %s, document id: '%s'", table.getPath(), document.asReader() == null ? null : IdCodec.asString(((DBDocumentReaderBase) document.asReader()).getId()))).build(logger);
        } catch (SchemaChangeException e) {
            String err_row = ((DBDocumentReaderBase) document.asReader()).getId().asJsonString();
            if (ignoreSchemaChange) {
                logger.warn("{}. Dropping row '{}' from result.", e.getMessage(), err_row);
                logger.debug("Stack trace:", e);
            } else {
                /* Save the current document reader for next iteration. The recordCount is not updated so we
           * would start from this reader on the next next() call
           */
                lastDocument = document;
                schemaState = SchemaState.SCHEMA_CHANGE;
                break;
            }
        }
    }
    if (nonExistentColumnsProjection && recordCount > 0) {
        if (schema == null || schema.isEmpty()) {
            JsonReaderUtils.ensureAtLeastOneField(vectorWriter, getColumns(), allTextMode, Collections.emptyList());
        } else {
            JsonReaderUtils.writeColumnsUsingSchema(vectorWriter, getColumns(), schema, allTextMode);
        }
    }
    vectorWriter.setValueCount(recordCount);
    if (maxRecordsToRead > 0) {
        maxRecordsToRead -= recordCount;
    }
    logger.debug("Took {} ms to get {} records", watch.elapsed(TimeUnit.MILLISECONDS), recordCount);
    return recordCount;
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) VectorContainerWriter(org.apache.drill.exec.vector.complex.impl.VectorContainerWriter) DBDocumentReaderBase(com.mapr.db.ojai.DBDocumentReaderBase) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) UserException(org.apache.drill.common.exceptions.UserException)

Example 50 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class RestrictedJsonRecordReader method next.

@Override
public int next() {
    Stopwatch watch = Stopwatch.createUnstarted();
    watch.start();
    RestrictedMapRDBSubScanSpec rss = ((RestrictedMapRDBSubScanSpec) this.subScanSpec);
    vectorWriter.allocate();
    vectorWriter.reset();
    if (!rss.readyToGetRowKey()) {
        // when we are in the build schema phase
        if (rss.isBuildSchemaPhase()) {
            readToInitSchema();
        }
        return 0;
    }
    Table table = super.formatPlugin.getJsonTableCache().getTable(subScanSpec.getTableName(), subScanSpec.getUserName());
    final MultiGet multiGet = new MultiGet((BaseJsonTable) table, condition, false, projections);
    int recordCount = 0;
    DBDocumentReaderBase reader = null;
    int maxRecordsForThisBatch = this.maxRecordsToRead > 0 ? Math.min(rss.getMaxRowKeysToBeRead(), this.maxRecordsToRead) : this.maxRecordsToRead == -1 ? rss.getMaxRowKeysToBeRead() : 0;
    Stopwatch timer = Stopwatch.createUnstarted();
    while (recordCount < maxRecordsForThisBatch) {
        ByteBuffer[] rowKeyIds = rss.getRowKeyIdsToRead(batchSize);
        if (rowKeyIds == null) {
            break;
        }
        try {
            timer.start();
            final List<Document> docList = multiGet.doGet(rowKeyIds);
            int index = 0;
            long docsToRead = docList.size();
            // If limit pushdown then stop once we have `limit` rows from multiget i.e. maxRecordsForThisBatch
            if (this.maxRecordsToRead != -1) {
                docsToRead = Math.min(docsToRead, maxRecordsForThisBatch);
            }
            while (index < docsToRead) {
                vectorWriter.setPosition(recordCount);
                reader = (DBDocumentReaderBase) docList.get(index).asReader();
                documentWriter.writeDBDocument(vectorWriter, reader);
                recordCount++;
                index++;
            }
            timer.stop();
        } catch (UserException e) {
            throw UserException.unsupportedError(e).addContext(String.format("Table: %s, document id: '%s'", getTable().getPath(), reader == null ? null : IdCodec.asString(reader.getId()))).build(logger);
        } catch (SchemaChangeException e) {
            if (getIgnoreSchemaChange()) {
                logger.warn("{}. Dropping the row from result.", e.getMessage());
                logger.debug("Stack trace:", e);
            } else {
                throw dataReadError(logger, e);
            }
        }
    }
    vectorWriter.setValueCount(recordCount);
    if (maxRecordsToRead > 0) {
        if (maxRecordsToRead - recordCount >= 0) {
            maxRecordsToRead -= recordCount;
        } else {
            maxRecordsToRead = 0;
        }
    }
    logger.debug("Took {} ms to get {} records, getrowkey {}", watch.elapsed(TimeUnit.MILLISECONDS), recordCount, timer.elapsed(TimeUnit.MILLISECONDS));
    return recordCount;
}
Also used : BaseJsonTable(com.mapr.db.impl.BaseJsonTable) Table(com.mapr.db.Table) RestrictedMapRDBSubScanSpec(org.apache.drill.exec.store.mapr.db.RestrictedMapRDBSubScanSpec) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) Document(org.ojai.Document) ByteBuffer(java.nio.ByteBuffer) MultiGet(com.mapr.db.impl.MultiGet) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) DBDocumentReaderBase(com.mapr.db.ojai.DBDocumentReaderBase) UserException(org.apache.drill.common.exceptions.UserException)

Aggregations

Stopwatch (org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)68 IOException (java.io.IOException)13 Path (org.apache.hadoop.fs.Path)12 ArrayList (java.util.ArrayList)8 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)8 FileStatus (org.apache.hadoop.fs.FileStatus)8 DrillBuf (io.netty.buffer.DrillBuf)7 ByteBuffer (java.nio.ByteBuffer)7 SchemaPath (org.apache.drill.common.expression.SchemaPath)7 HashMap (java.util.HashMap)5 RelNode (org.apache.calcite.rel.RelNode)5 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)4 DrillbitEndpoint (org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)4 VectorContainer (org.apache.drill.exec.record.VectorContainer)4 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)4 ValueVector (org.apache.drill.exec.vector.ValueVector)4 CompressionCodecName (org.apache.parquet.hadoop.metadata.CompressionCodecName)4 File (java.io.File)3 ResultSet (java.sql.ResultSet)3 ResultSetMetaData (java.sql.ResultSetMetaData)3