Search in sources :

Example 41 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class AsyncPageReader method decompressPageV1.

/**
 * Reads a compressed v1 data page or a dictionary page, both of which are compressed
 * in their entirety.
 * @return decompressed Parquet page data
 * @throws IOException
 */
protected DrillBuf decompressPageV1(ReadStatus readStatus) throws IOException {
    Stopwatch timer = Stopwatch.createUnstarted();
    PageHeader pageHeader = readStatus.getPageHeader();
    int inputSize = pageHeader.getCompressed_page_size();
    int outputSize = pageHeader.getUncompressed_page_size();
    // TODO: does reporting this number have the same meaning in an async context?
    long start = dataReader.getPos();
    long timeToRead;
    DrillBuf inputPageData = readStatus.getPageData();
    DrillBuf outputPageData = this.allocator.buffer(outputSize);
    try {
        timer.start();
        CompressionCodecName codecName = columnChunkMetaData.getCodec();
        CompressionCodecFactory.BytesInputDecompressor decomp = codecFactory.getDecompressor(codecName);
        ByteBuffer input = inputPageData.nioBuffer(0, inputSize);
        ByteBuffer output = outputPageData.nioBuffer(0, outputSize);
        decomp.decompress(input, inputSize, output, outputSize);
        outputPageData.writerIndex(outputSize);
        timeToRead = timer.elapsed(TimeUnit.NANOSECONDS);
        if (logger.isTraceEnabled()) {
            logger.trace("Col: {}  readPos: {}  Uncompressed_size: {}  pageData: {}", columnChunkMetaData.toString(), // TODO: see comment on earlier call to getPos()
            dataReader.getPos(), outputSize, ByteBufUtil.hexDump(outputPageData));
        }
        this.updateStats(pageHeader, "Decompress", start, timeToRead, inputSize, outputSize);
    } finally {
        readStatus.setPageData(null);
        if (inputPageData != null) {
            inputPageData.release();
        }
    }
    return outputPageData;
}
Also used : CompressionCodecFactory(org.apache.parquet.compression.CompressionCodecFactory) PageHeader(org.apache.parquet.format.PageHeader) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) ByteBuffer(java.nio.ByteBuffer) DrillBuf(io.netty.buffer.DrillBuf)

Example 42 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class ParquetRecordReader method next.

/**
 * Read the next record batch from the file using the reader and read state
 * created previously.
 */
@Override
public int next() {
    readState.resetBatch();
    Stopwatch timer = Stopwatch.createStarted();
    try {
        return batchReader.readBatch();
    } catch (Exception e) {
        throw handleAndRaise("\nHadoop path: " + hadoopPath.toUri().getPath() + "\nTotal records read: " + readState.recordsRead() + "\nRow group index: " + rowGroupIndex + "\nRecords to read: " + numRecordsToRead, e);
    } finally {
        parquetReaderStats.timeProcess.addAndGet(timer.elapsed(TimeUnit.NANOSECONDS));
    }
}
Also used : Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException)

Example 43 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class VarLenBinaryReader method readFields.

/**
 * Reads as many variable length values as possible.
 *
 * @param recordsToReadInThisPass - the number of records recommended for reading form the reader
 * @return - the number of fixed length fields that will fit in the batch
 */
public long readFields(long recordsToReadInThisPass) throws IOException {
    // write the first 0 offset
    for (VarLengthColumn<?> columnReader : columns) {
        columnReader.reset();
    }
    Stopwatch timer = Stopwatch.createStarted();
    // Ensure we do not read more than batch record count
    recordsToReadInThisPass = Math.min(recordsToReadInThisPass, batchSizer.getCurrentRecordsPerBatch());
    long recordsReadInCurrentPass = 0;
    if (!useBulkReader) {
        recordsReadInCurrentPass = determineSizesSerial(recordsToReadInThisPass);
        if (useAsyncTasks) {
            readRecordsParallel(recordsReadInCurrentPass);
        } else {
            readRecordsSerial(recordsReadInCurrentPass);
        }
    } else {
        recordsReadInCurrentPass = readRecordsInBulk((int) recordsToReadInThisPass);
    }
    // Publish this information
    parentReader.getReadState().setValuesReadInCurrentPass((int) recordsReadInCurrentPass);
    // Update the stats
    parentReader.parquetReaderStats.timeVarColumnRead.addAndGet(timer.elapsed(TimeUnit.NANOSECONDS));
    return recordsReadInCurrentPass;
}
Also used : Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)

Example 44 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class BatchReader method readAllFixedFields.

protected void readAllFixedFields(long recordsToRead) throws Exception {
    Stopwatch timer = Stopwatch.createStarted();
    if (readState.useAsyncColReader()) {
        readAllFixedFieldsParallel(recordsToRead);
    } else {
        readAllFixedFieldsSerial(recordsToRead);
    }
    readState.parquetReaderStats().timeFixedColumnRead.addAndGet(timer.elapsed(TimeUnit.NANOSECONDS));
}
Also used : Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)

Example 45 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class BlockMapBuilder method buildEndpointMap.

/**
 * Builds a mapping of Drillbit endpoints to hostnames
 */
private static ImmutableMap<String, DrillbitEndpoint> buildEndpointMap(Collection<DrillbitEndpoint> endpoints) {
    Stopwatch watch = Stopwatch.createStarted();
    HashMap<String, DrillbitEndpoint> endpointMap = Maps.newHashMap();
    for (DrillbitEndpoint d : endpoints) {
        String hostName = d.getAddress();
        endpointMap.put(hostName, d);
    }
    watch.stop();
    logger.debug("Took {} ms to build endpoint map", watch.elapsed(TimeUnit.MILLISECONDS));
    return ImmutableMap.copyOf(endpointMap);
}
Also used : DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)

Aggregations

Stopwatch (org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)68 IOException (java.io.IOException)13 Path (org.apache.hadoop.fs.Path)12 ArrayList (java.util.ArrayList)8 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)8 FileStatus (org.apache.hadoop.fs.FileStatus)8 DrillBuf (io.netty.buffer.DrillBuf)7 ByteBuffer (java.nio.ByteBuffer)7 SchemaPath (org.apache.drill.common.expression.SchemaPath)7 HashMap (java.util.HashMap)5 RelNode (org.apache.calcite.rel.RelNode)5 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)4 DrillbitEndpoint (org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)4 VectorContainer (org.apache.drill.exec.record.VectorContainer)4 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)4 ValueVector (org.apache.drill.exec.vector.ValueVector)4 CompressionCodecName (org.apache.parquet.hadoop.metadata.CompressionCodecName)4 File (java.io.File)3 ResultSet (java.sql.ResultSet)3 ResultSetMetaData (java.sql.ResultSetMetaData)3