Search in sources :

Example 36 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class JdbcTestActionBase method testAction.

protected void testAction(JdbcAction action, long rowcount) throws Exception {
    final StringBuilder sb = new StringBuilder();
    int rows = 0;
    Stopwatch watch = Stopwatch.createStarted();
    ResultSet r = action.getResult(connection);
    boolean first = true;
    while (r.next()) {
        rows++;
        ResultSetMetaData md = r.getMetaData();
        if (first == true) {
            for (int i = 1; i <= md.getColumnCount(); i++) {
                sb.append(md.getColumnName(i));
                sb.append('\t');
            }
            sb.append('\n');
            first = false;
        }
        for (int i = 1; i <= md.getColumnCount(); i++) {
            sb.append(r.getObject(i));
            sb.append('\t');
        }
        sb.append('\n');
    }
    sb.append(String.format("Query completed in %d millis.\n", watch.elapsed(TimeUnit.MILLISECONDS)));
    if (rowcount != -1) {
        Assert.assertEquals((long) rowcount, (long) rows);
    }
    sb.append("\n\n\n");
    logger.info(sb.toString());
}
Also used : ResultSetMetaData(java.sql.ResultSetMetaData) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) ResultSet(java.sql.ResultSet)

Example 37 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class TestJdbcDistQuery method testQuery.

private void testQuery(String sql) throws Exception {
    final StringBuilder sb = new StringBuilder();
    boolean success = false;
    try (Connection c = connect()) {
        // ???? TODO:  What is this currently redundant one-time loop for?  (If
        // it's kept around to make it easy to switch to looping multiple times
        // (e.g., for debugging) then define a constant field or local variable
        // for the number of iterations.)
        boolean first = true;
        for (int x = 0; x < 1; x++) {
            Stopwatch watch = Stopwatch.createStarted();
            Statement s = c.createStatement();
            ResultSet r = s.executeQuery(sql);
            ResultSetMetaData md = r.getMetaData();
            if (first) {
                for (int i = 1; i <= md.getColumnCount(); i++) {
                    sb.append(md.getColumnName(i));
                    sb.append('\t');
                }
                sb.append('\n');
                first = false;
            }
            while (r.next()) {
                md = r.getMetaData();
                for (int i = 1; i <= md.getColumnCount(); i++) {
                    sb.append(r.getObject(i));
                    sb.append('\t');
                }
                sb.append('\n');
            }
            sb.append(String.format("Query completed in %d millis.\n", watch.elapsed(TimeUnit.MILLISECONDS)));
        }
        sb.append("\n\n\n");
        success = true;
    } finally {
        if (!success) {
            Thread.sleep(2000);
        }
    }
    logger.info(sb.toString());
}
Also used : ResultSetMetaData(java.sql.ResultSetMetaData) Statement(java.sql.Statement) Connection(java.sql.Connection) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) ResultSet(java.sql.ResultSet)

Example 38 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class Metadata method tableModified.

/**
 * Check if the parquet metadata needs to be updated by comparing the modification time of the directories with
 * the modification time of the metadata file
 *
 * @param directories List of directories
 * @param metaFilePath path of parquet metadata cache file
 * @return true if metadata needs to be updated, false otherwise
 * @throws IOException if some resources are not accessible
 */
private boolean tableModified(List<Path> directories, Path metaFilePath, Path parentDir, MetadataContext metaContext, FileSystem fs) throws IOException {
    Stopwatch timer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null;
    metaContext.setStatus(parentDir);
    long metaFileModifyTime = fs.getFileStatus(metaFilePath).getModificationTime();
    FileStatus directoryStatus = fs.getFileStatus(parentDir);
    int numDirs = 1;
    if (directoryStatus.getModificationTime() > metaFileModifyTime) {
        return logAndStopTimer(true, directoryStatus.getPath().toString(), timer, numDirs);
    }
    boolean isModified = false;
    for (Path directory : directories) {
        numDirs++;
        metaContext.setStatus(directory);
        directoryStatus = fs.getFileStatus(directory);
        if (directoryStatus.getModificationTime() > metaFileModifyTime) {
            isModified = true;
            break;
        }
    }
    return logAndStopTimer(isModified, directoryStatus.getPath().toString(), timer, numDirs);
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) FileStatus(org.apache.hadoop.fs.FileStatus) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)

Example 39 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class Metadata method getParquetTableMetadata.

/**
 * Get the parquet metadata for the parquet files in a directory.
 *
 * @param path the path of the directory
 * @return metadata object for an entire parquet directory structure
 * @throws IOException in case of problems during accessing files
 */
private ParquetTableMetadata_v4 getParquetTableMetadata(Path path, FileSystem fs) throws IOException {
    FileStatus fileStatus = fs.getFileStatus(path);
    Stopwatch watch = logger.isDebugEnabled() ? Stopwatch.createStarted() : null;
    List<FileStatus> fileStatuses = new ArrayList<>();
    if (fileStatus.isFile()) {
        fileStatuses.add(fileStatus);
    } else {
        // the thing we need!?
        fileStatuses.addAll(DrillFileSystemUtil.listFiles(fs, path, true));
    }
    if (watch != null) {
        logger.debug("Took {} ms to get file statuses", watch.elapsed(TimeUnit.MILLISECONDS));
        watch.reset();
        watch.start();
    }
    Map<FileStatus, FileSystem> fileStatusMap = fileStatuses.stream().collect(java.util.stream.Collectors.toMap(Function.identity(), s -> fs, (oldFs, newFs) -> newFs, LinkedHashMap::new));
    ParquetTableMetadata_v4 metadata_v4 = getParquetTableMetadata(fileStatusMap);
    if (watch != null) {
        logger.debug("Took {} ms to read file metadata", watch.elapsed(TimeUnit.MILLISECONDS));
        watch.stop();
    }
    return metadata_v4;
}
Also used : TimedCallable(org.apache.drill.exec.store.TimedCallable) ParquetTableMetadata_v4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ParquetTableMetadata_v4) FileSystem(org.apache.hadoop.fs.FileSystem) LoggerFactory(org.slf4j.LoggerFactory) FileStatus(org.apache.hadoop.fs.FileStatus) DeserializationFeature(com.fasterxml.jackson.databind.DeserializationFeature) SimpleModule(com.fasterxml.jackson.databind.module.SimpleModule) Pair(org.apache.commons.lang3.tuple.Pair) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) SHORT_PREFIX_STYLE(org.apache.commons.lang3.builder.ToStringStyle.SHORT_PREFIX_STYLE) ParquetTableMetadataBase(org.apache.drill.exec.store.parquet.metadata.MetadataBase.ParquetTableMetadataBase) Collectors(org.apache.drill.common.collections.Collectors) SchemaPath(org.apache.drill.common.expression.SchemaPath) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) PrivilegedExceptionAction(java.security.PrivilegedExceptionAction) RowGroupMetadata(org.apache.drill.exec.store.parquet.metadata.MetadataBase.RowGroupMetadata) AfterburnerModule(com.fasterxml.jackson.module.afterburner.AfterburnerModule) DrillFileSystemUtil(org.apache.drill.exec.util.DrillFileSystemUtil) List(java.util.List) FileMetadata(org.apache.drill.exec.store.parquet.metadata.Metadata_V4.FileMetadata) ToStringBuilder(org.apache.commons.lang3.builder.ToStringBuilder) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) HadoopInputFile(org.apache.parquet.hadoop.util.HadoopInputFile) ParquetFileMetadata_v4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ParquetFileMetadata_v4) ParquetReaderConfig(org.apache.drill.exec.store.parquet.ParquetReaderConfig) ParquetFileMetadata(org.apache.drill.exec.store.parquet.metadata.MetadataBase.ParquetFileMetadata) ParquetFileAndRowCountMetadata(org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ParquetFileAndRowCountMetadata) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) LinkedHashMap(java.util.LinkedHashMap) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) ImpersonationUtil(org.apache.drill.exec.util.ImpersonationUtil) PathSerDe(org.apache.drill.exec.serialization.PathSerDe) ColumnMetadata_v4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ColumnMetadata_v4) OutputStream(java.io.OutputStream) DrillVersionInfo(org.apache.drill.common.util.DrillVersionInfo) Logger(org.slf4j.Logger) JsonParser(com.fasterxml.jackson.core.JsonParser) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) SUPPORTED_VERSIONS(org.apache.drill.exec.store.parquet.metadata.MetadataVersion.Constants.SUPPORTED_VERSIONS) IOException(java.io.IOException) MetadataContext(org.apache.drill.exec.store.dfs.MetadataContext) ParquetFileReader(org.apache.parquet.hadoop.ParquetFileReader) TimeUnit(java.util.concurrent.TimeUnit) JsonFactory(com.fasterxml.jackson.core.JsonFactory) Feature(com.fasterxml.jackson.core.JsonGenerator.Feature) Lists(org.apache.drill.shaded.guava.com.google.common.collect.Lists) ColumnTypeMetadata_v4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ColumnTypeMetadata_v4) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) MetadataSummary(org.apache.drill.exec.store.parquet.metadata.Metadata_V4.MetadataSummary) InputStream(java.io.InputStream) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) ArrayList(java.util.ArrayList) ParquetTableMetadata_v4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ParquetTableMetadata_v4)

Example 40 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class PageReader method readCompressedPageV1.

/**
 * Reads a compressed v1 data page or a dictionary page, both of which are compressed
 * in their entirety.
 * @return decompressed Parquet page data
 * @throws IOException
 */
protected DrillBuf readCompressedPageV1() throws IOException {
    Stopwatch timer = Stopwatch.createUnstarted();
    int inputSize = pageHeader.getCompressed_page_size();
    int outputSize = pageHeader.getUncompressed_page_size();
    long start = dataReader.getPos();
    long timeToRead;
    DrillBuf inputPageData = null;
    DrillBuf outputPageData = this.allocator.buffer(outputSize);
    try {
        timer.start();
        inputPageData = dataReader.getNext(inputSize);
        timeToRead = timer.elapsed(TimeUnit.NANOSECONDS);
        this.updateStats(pageHeader, "Page Read", start, timeToRead, inputSize, inputSize);
        timer.reset();
        timer.start();
        start = dataReader.getPos();
        CompressionCodecName codecName = columnChunkMetaData.getCodec();
        BytesInputDecompressor decomp = codecFactory.getDecompressor(codecName);
        ByteBuffer input = inputPageData.nioBuffer(0, inputSize);
        ByteBuffer output = outputPageData.nioBuffer(0, outputSize);
        decomp.decompress(input, inputSize, output, outputSize);
        outputPageData.writerIndex(outputSize);
        timeToRead = timer.elapsed(TimeUnit.NANOSECONDS);
        if (logger.isTraceEnabled()) {
            logger.trace("Col: {}  readPos: {}  Uncompressed_size: {}  pageData: {}", columnChunkMetaData.toString(), dataReader.getPos(), outputSize, ByteBufUtil.hexDump(outputPageData));
        }
        this.updateStats(pageHeader, "Decompress", start, timeToRead, inputSize, outputSize);
    } finally {
        if (inputPageData != null) {
            inputPageData.release();
        }
    }
    return outputPageData;
}
Also used : CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) BytesInputDecompressor(org.apache.parquet.compression.CompressionCodecFactory.BytesInputDecompressor) ByteBuffer(java.nio.ByteBuffer) DrillBuf(io.netty.buffer.DrillBuf)

Aggregations

Stopwatch (org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)68 IOException (java.io.IOException)13 Path (org.apache.hadoop.fs.Path)12 ArrayList (java.util.ArrayList)8 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)8 FileStatus (org.apache.hadoop.fs.FileStatus)8 DrillBuf (io.netty.buffer.DrillBuf)7 ByteBuffer (java.nio.ByteBuffer)7 SchemaPath (org.apache.drill.common.expression.SchemaPath)7 HashMap (java.util.HashMap)5 RelNode (org.apache.calcite.rel.RelNode)5 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)4 DrillbitEndpoint (org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)4 VectorContainer (org.apache.drill.exec.record.VectorContainer)4 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)4 ValueVector (org.apache.drill.exec.vector.ValueVector)4 CompressionCodecName (org.apache.parquet.hadoop.metadata.CompressionCodecName)4 File (java.io.File)3 ResultSet (java.sql.ResultSet)3 ResultSetMetaData (java.sql.ResultSetMetaData)3