Search in sources :

Example 21 with AfterburnerModule

use of com.fasterxml.jackson.module.afterburner.AfterburnerModule in project drill by apache.

the class Metadata method getSummary.

/**
 * Reads the summary from the metadata cache file, if the cache file is stale recreates the metadata
 * @param fs file system
 * @param metadataParentDir parent directory that holds metadata files
 * @param autoRefreshTriggered true if the auto-refresh is already triggered
 * @param readerConfig Parquet reader config
 * @return returns metadata summary
 */
public static Metadata_V4.MetadataSummary getSummary(FileSystem fs, Path metadataParentDir, boolean autoRefreshTriggered, ParquetReaderConfig readerConfig) {
    Path summaryFile = getSummaryFileName(metadataParentDir);
    Path metadataDirFile = getDirFileName(metadataParentDir);
    MetadataContext metaContext = new MetadataContext();
    try {
        // If autoRefresh is not triggered and none of the metadata files exist
        if (!autoRefreshTriggered && !metadataExists(fs, metadataParentDir)) {
            logger.debug("Metadata doesn't exist in {}", metadataParentDir);
            return null;
        } else if (autoRefreshTriggered && !fs.exists(summaryFile)) {
            logger.debug("Metadata Summary file {} does not exist", summaryFile);
            return null;
        } else {
            // If the autorefresh is not triggered, check if the cache file is stale and trigger auto-refresh
            if (!autoRefreshTriggered) {
                Metadata metadata = new Metadata(readerConfig);
                if (!fs.exists(metadataDirFile)) {
                    return null;
                }
                ParquetTableMetadataDirs metadataDirs = readMetadataDirs(fs, metadataDirFile, metaContext, readerConfig);
                if (metadata.tableModified(metadataDirs.getDirectories(), summaryFile, metadataParentDir, metaContext, fs) && true) {
                    ParquetTableMetadata_v4 parquetTableMetadata = (metadata.createMetaFilesRecursivelyAsProcessUser(Path.getPathWithoutSchemeAndAuthority(summaryFile.getParent()), fs, true, null, true)).getLeft();
                    return parquetTableMetadata.getSummary();
                }
            }
            // Read the existing metadataSummary cache file to get the metadataSummary
            ObjectMapper mapper = new ObjectMapper();
            final SimpleModule serialModule = new SimpleModule();
            serialModule.addDeserializer(SchemaPath.class, new SchemaPath.De());
            serialModule.addKeyDeserializer(ColumnTypeMetadata_v4.Key.class, new ColumnTypeMetadata_v4.Key.DeSerializer());
            AfterburnerModule module = new AfterburnerModule();
            module.setUseOptimizedBeanDeserializer(true);
            mapper.registerModule(serialModule);
            mapper.registerModule(module);
            mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
            InputStream is = fs.open(summaryFile);
            return mapper.readValue(is, Metadata_V4.MetadataSummary.class);
        }
    } catch (IOException e) {
        logger.debug("Failed to read '{}' summary metadata file", summaryFile, e);
        return null;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) PathSerDe(org.apache.drill.exec.serialization.PathSerDe) ColumnTypeMetadata_v4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ColumnTypeMetadata_v4) InputStream(java.io.InputStream) RowGroupMetadata(org.apache.drill.exec.store.parquet.metadata.MetadataBase.RowGroupMetadata) FileMetadata(org.apache.drill.exec.store.parquet.metadata.Metadata_V4.FileMetadata) ParquetFileMetadata(org.apache.drill.exec.store.parquet.metadata.MetadataBase.ParquetFileMetadata) ParquetFileAndRowCountMetadata(org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ParquetFileAndRowCountMetadata) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ParquetTableMetadata_v4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ParquetTableMetadata_v4) IOException(java.io.IOException) AfterburnerModule(com.fasterxml.jackson.module.afterburner.AfterburnerModule) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataContext(org.apache.drill.exec.store.dfs.MetadataContext) MetadataSummary(org.apache.drill.exec.store.parquet.metadata.Metadata_V4.MetadataSummary) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) SimpleModule(com.fasterxml.jackson.databind.module.SimpleModule)

Aggregations

AfterburnerModule (com.fasterxml.jackson.module.afterburner.AfterburnerModule)21 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)18 SimpleModule (com.fasterxml.jackson.databind.module.SimpleModule)4 Jdk8Module (com.fasterxml.jackson.datatype.jdk8.Jdk8Module)4 JavaTimeModule (com.fasterxml.jackson.datatype.jsr310.JavaTimeModule)4 SchemaPath (org.apache.drill.common.expression.SchemaPath)4 Path (org.apache.hadoop.fs.Path)4 JacksonPayloadSerializer (co.elastic.apm.report.serialize.JacksonPayloadSerializer)3 JsonFactory (com.fasterxml.jackson.core.JsonFactory)3 JodaModule (com.fasterxml.jackson.datatype.joda.JodaModule)3 MediaContent (data.media.MediaContent)3 IOException (java.io.IOException)3 SerFeatures (serializers.SerFeatures)3 XmlMapper (com.fasterxml.jackson.dataformat.xml.XmlMapper)2 Stopwatch (com.google.common.base.Stopwatch)2 InputStream (java.io.InputStream)2 ParquetFileMetadata (org.apache.drill.exec.store.parquet.metadata.MetadataBase.ParquetFileMetadata)2 ColumnTypeMetadata_v4 (org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ColumnTypeMetadata_v4)2 FileMetadata (org.apache.drill.exec.store.parquet.metadata.Metadata_V4.FileMetadata)2 MetadataSummary (org.apache.drill.exec.store.parquet.metadata.Metadata_V4.MetadataSummary)2